Support download

2025-09-23 15:01:36 +08:00
parent 45c005687d
commit f15b730dca
4 changed files with 227 additions and 64 deletions
--- a/crawler/views.py
+++ b/crawler/views.py
@@ -355,3 +355,80 @@ def download_crawled_content(request, content_id):
    response['Content-Disposition'] = f'attachment; filename="{filename}"'
    
    return response
+
+
+def download_selected_contents(request):
+    """下载选中的多篇文章内容为一个压缩包"""
+    if request.method == 'POST':
+        # 获取选中的文章ID
+        selected_ids = request.POST.getlist('selected_contents')
+        
+        if not selected_ids:
+            # 如果没有选中任何文章，返回错误
+            return HttpResponse("请至少选择一篇文章", status=400)
+        
+        # 获取选中的文章
+        contents = CrawledContent.objects.filter(id__in=selected_ids)
+        
+        if not contents.exists():
+            return HttpResponse("未找到选中的文章", status=404)
+        
+        # 创建内存中的字节流用于存储zip文件
+        zip_buffer = BytesIO()
+        
+        # 创建zip文件
+        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+            for content in contents:
+                # 为每篇文章创建一个文件夹
+                folder_name = f"{content.title[:30].strip()}"
+                # 确保文件夹名称合法
+                folder_name = "".join(c for c in folder_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
+                
+                # 创建Word文档
+                doc = Document()
+                doc.add_heading(content.title, 0)
+                
+                # 添加元数据
+                doc.add_paragraph(f'来源网站: {content.website.name} ({content.website.region})')
+                doc.add_paragraph(f'原始链接: {content.url}')
+                doc.add_paragraph(f'发布时间: {content.publish_date or "未知"}')
+                doc.add_paragraph(f'作者: {content.author or "未知"}')
+                doc.add_paragraph(f'匹配关键字: {content.keywords_matched}')
+                doc.add_paragraph(f'爬取时间: {content.created_at}')
+                
+                # 添加内容
+                doc.add_heading('正文', level=1)
+                for paragraph in content.content.split('\n\n'):
+                    if paragraph.strip():
+                        doc.add_paragraph(paragraph.strip())
+                
+                # 保存Word文档到内存
+                doc_buffer = BytesIO()
+                doc.save(doc_buffer)
+                doc_buffer.seek(0)
+                
+                # 添加Word文档到zip文件
+                zip_file.writestr(f"{folder_name}/{content.title[:50]}.docx", doc_buffer.getvalue())
+                
+                # 添加媒体文件到zip文件
+                media_files = content.media_files.all()
+                for media_file in media_files:
+                    try:
+                        # 获取媒体文件的本地路径
+                        if media_file.local_file and default_storage.exists(media_file.local_file.name):
+                            # 读取文件内容
+                            file_content = default_storage.open(media_file.local_file.name).read()
+                            # 添加到zip文件中
+                            zip_file.writestr(f"{folder_name}/media/{os.path.basename(media_file.local_file.name)}", file_content)
+                    except Exception as e:
+                        # 如果文件无法读取，记录错误但继续处理其他文件
+                        pass
+        
+        # 准备响应
+        zip_buffer.seek(0)
+        response = HttpResponse(zip_buffer.getvalue(), content_type='application/zip')
+        response['Content-Disposition'] = 'attachment; filename="selected_articles.zip"'
+        
+        return response
+    
+    return HttpResponse("无效的请求方法", status=405)