Support export for Word

2025-08-11 23:14:56 +08:00
parent bfd1604872
commit b6bbb90703
2 changed files with 284 additions and 2 deletions
--- a/core/admin.py
+++ b/core/admin.py
@@ -34,7 +34,7 @@ class ArticleAdmin(admin.ModelAdmin):
    list_display = ('title', 'website', 'pub_date')
    search_fields = ('title', 'content')
    # 添加动作选项
-    actions = ['delete_selected_articles', 'delete_dongfangyancao_articles', 'export_as_csv', 'export_as_json']
+    actions = ['delete_selected_articles', 'delete_dongfangyancao_articles', 'export_as_csv', 'export_as_json', 'export_as_word']
    
    def delete_dongfangyancao_articles(self, request, queryset):
        """一键删除东方烟草报的所有文章"""
@@ -93,6 +93,109 @@ class ArticleAdmin(admin.ModelAdmin):
    
    export_as_json.short_description = "导出选中文章为JSON格式"

+    def export_as_word(self, request, queryset):
+        """导出选中的文章为Word格式"""
+        try:
+            from docx import Document
+            from io import BytesIO
+            from docx.shared import Inches
+        except ImportError:
+            self.message_user(request, "缺少python-docx库，请安装: pip install python-docx", messages.ERROR)
+            return
+
+        # 创建Word文档
+        doc = Document()
+        doc.add_heading('文章导出', 0)
+
+        for article in queryset:
+            # 添加文章标题
+            doc.add_heading(article.title, level=1)
+            
+            # 添加文章元数据
+            doc.add_paragraph(f"网站: {article.website.name}")
+            doc.add_paragraph(f"URL: {article.url}")
+            doc.add_paragraph(f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}")
+            doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
+            
+            # 添加文章内容
+            doc.add_heading('内容', level=2)
+            # 简单处理HTML内容，移除标签并处理图片
+            from bs4 import BeautifulSoup
+            soup = BeautifulSoup(article.content, 'html.parser')
+            
+            # 处理内容中的图片
+            for img in soup.find_all('img'):
+                src = img.get('src', '')
+                if src:
+                    # 尝试添加图片到文档
+                    try:
+                        import os
+                        from django.conf import settings
+                        import requests
+                        from io import BytesIO
+                        
+                        # 构建完整的图片路径
+                        if src.startswith('http'):
+                            # 网络图片
+                            response = requests.get(src, timeout=10)
+                            image_stream = BytesIO(response.content)
+                            doc.add_picture(image_stream, width=Inches(4.0))
+                        else:
+                            # 本地图片
+                            full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
+                            if os.path.exists(full_path):
+                                doc.add_picture(full_path, width=Inches(4.0))
+                    except Exception as e:
+                        # 如果添加图片失败，添加图片URL作为文本
+                        doc.add_paragraph(f"[图片: {src}]")
+                
+                # 移除原始img标签
+                img.decompose()
+            
+            content_text = soup.get_text()
+            doc.add_paragraph(content_text)
+            
+            # 添加媒体文件信息
+            if article.media_files:
+                doc.add_heading('媒体文件', level=2)
+                for media_file in article.media_files:
+                    try:
+                        import os
+                        from django.conf import settings
+                        from io import BytesIO
+                        import requests
+                        
+                        full_path = os.path.join(settings.MEDIA_ROOT, media_file)
+                        if os.path.exists(full_path):
+                            # 添加图片到文档
+                            doc.add_picture(full_path, width=Inches(4.0))
+                        else:
+                            # 如果是URL格式的媒体文件
+                            if media_file.startswith('http'):
+                                response = requests.get(media_file, timeout=10)
+                                image_stream = BytesIO(response.content)
+                                doc.add_picture(image_stream, width=Inches(4.0))
+                            else:
+                                doc.add_paragraph(media_file)
+                    except Exception as e:
+                        doc.add_paragraph(media_file)
+            
+            # 添加分页符
+            doc.add_page_break()
+        
+        # 保存到内存
+        buffer = BytesIO()
+        doc.save(buffer)
+        buffer.seek(0)
+        
+        # 创建HttpResponse
+        from django.http import HttpResponse
+        response = HttpResponse(buffer.getvalue(), content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
+        response['Content-Disposition'] = 'attachment; filename=articles.docx'
+        return response
+    
+    export_as_word.short_description = "导出选中文章为Word格式"
+
 # 为不同网站创建专门的文章管理类
 class NewsCnArticleAdmin(admin.ModelAdmin):
    list_display = ('title', 'pub_date')