Unknow change

This commit is contained in:
2025-08-13 18:40:31 +08:00
parent 5e396796ca
commit c618528a0a
6 changed files with 996 additions and 528 deletions

View File

@@ -34,7 +34,7 @@ def run_crawler_view(request):
if not website_name:
messages.error(request, '请选择要爬取的网站')
return redirect('admin:core_article_changelist')
try:
# 根据网站名称确定要执行的爬虫命令
if website_name == 'crawl_xinhua':
@@ -46,14 +46,14 @@ def run_crawler_view(request):
else:
# 对于其他网站,使用通用爬虫命令
crawler_name = 'crawl_articles'
# 运行爬虫命令不传递website_name作为参数
call_command(crawler_name)
messages.success(request, f'成功执行爬虫: {crawler_name}')
except Exception as e:
messages.error(request, f'执行爬虫失败: {str(e)}')
return redirect('admin:core_article_changelist')
@@ -241,12 +241,12 @@ class ArticleAdmin(admin.ModelAdmin):
# 创建内存中的ZIP文件
zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
for article in queryset:
# 为每篇文章创建单独的文件夹
article_folder = f"article_{article.id}_{article.title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')}"
# 创建Word文档
doc = Document()
doc.add_heading(article.title, 0)
@@ -281,7 +281,8 @@ class ArticleAdmin(admin.ModelAdmin):
image_stream = BytesIO(response.content)
doc.add_picture(image_stream, width=Inches(4.0))
# 将网络文件保存到ZIP
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(src)), response.content)
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(src)),
response.content)
else:
# 本地图片
full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
@@ -310,7 +311,7 @@ class ArticleAdmin(admin.ModelAdmin):
full_path = os.path.join(settings.MEDIA_ROOT, media_file)
# 检查文件扩展名以确定处理方式
file_extension = os.path.splitext(media_file)[1].lower()
# 图片文件处理
if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
if os.path.exists(full_path):
@@ -325,7 +326,9 @@ class ArticleAdmin(admin.ModelAdmin):
image_stream = BytesIO(response.content)
doc.add_picture(image_stream, width=Inches(4.0))
# 将网络文件保存到ZIP
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(media_file)), response.content)
zip_file.writestr(
os.path.join(article_folder, 'media', os.path.basename(media_file)),
response.content)
else:
doc.add_paragraph(media_file)
# 视频文件处理
@@ -341,7 +344,9 @@ class ArticleAdmin(admin.ModelAdmin):
if media_file.startswith('http'):
# 将网络文件保存到ZIP
response = requests.get(media_file, timeout=10)
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(media_file)), response.content)
zip_file.writestr(
os.path.join(article_folder, 'media', os.path.basename(media_file)),
response.content)
doc.add_paragraph(f"[视频文件: {media_file}]")
else:
doc.add_paragraph(media_file)
@@ -355,7 +360,9 @@ class ArticleAdmin(admin.ModelAdmin):
# 如果是URL格式的媒体文件
if media_file.startswith('http'):
response = requests.get(media_file, timeout=10)
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(media_file)), response.content)
zip_file.writestr(
os.path.join(article_folder, 'media', os.path.basename(media_file)),
response.content)
doc.add_paragraph(f"[文件: {media_file}]")
else:
doc.add_paragraph(media_file)
@@ -366,7 +373,8 @@ class ArticleAdmin(admin.ModelAdmin):
doc_buffer = BytesIO()
doc.save(doc_buffer)
doc_buffer.seek(0)
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'), doc_buffer.read())
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'),
doc_buffer.read())
# 创建HttpResponse
zip_buffer.seek(0)
@@ -512,5 +520,4 @@ class DongfangyancaoArticleAdmin(admin.ModelAdmin):
export_as_json.short_description = "导出选中文章为JSON格式"
# 在各自的管理站点中注册模型