Support export for Word

2025-08-11 23:14:56 +08:00
parent bfd1604872
commit b6bbb90703
2 changed files with 284 additions and 2 deletions
--- a/core/admin.py
+++ b/core/admin.py
@@ -34,7 +34,7 @@ class ArticleAdmin(admin.ModelAdmin):
    list_display = ('title', 'website', 'pub_date')
    search_fields = ('title', 'content')
    # 添加动作选项
-    actions = ['delete_selected_articles', 'delete_dongfangyancao_articles', 'export_as_csv', 'export_as_json']
+    actions = ['delete_selected_articles', 'delete_dongfangyancao_articles', 'export_as_csv', 'export_as_json', 'export_as_word']
    def delete_dongfangyancao_articles(self, request, queryset):
        """一键删除东方烟草报的所有文章"""
@@ -93,6 +93,109 @@ class ArticleAdmin(admin.ModelAdmin):
    export_as_json.short_description = "导出选中文章为JSON格式"
    def export_as_word(self, request, queryset):
        """导出选中的文章为Word格式"""
        try:
            from docx import Document
            from io import BytesIO
            from docx.shared import Inches
        except ImportError:
            self.message_user(request, "缺少python-docx库，请安装: pip install python-docx", messages.ERROR)
            return
        # 创建Word文档
        doc = Document()
        doc.add_heading('文章导出', 0)
        for article in queryset:
            # 添加文章标题
            doc.add_heading(article.title, level=1)
            # 添加文章元数据
            doc.add_paragraph(f"网站: {article.website.name}")
            doc.add_paragraph(f"URL: {article.url}")
            doc.add_paragraph(f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}")
            doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
            # 添加文章内容
            doc.add_heading('内容', level=2)
            # 简单处理HTML内容，移除标签并处理图片
            from bs4 import BeautifulSoup
            soup = BeautifulSoup(article.content, 'html.parser')
            # 处理内容中的图片
            for img in soup.find_all('img'):
                src = img.get('src', '')
                if src:
                    # 尝试添加图片到文档
                    try:
                        import os
                        from django.conf import settings
                        import requests
                        from io import BytesIO
                        # 构建完整的图片路径
                        if src.startswith('http'):
                            # 网络图片
                            response = requests.get(src, timeout=10)
                            image_stream = BytesIO(response.content)
                            doc.add_picture(image_stream, width=Inches(4.0))
                        else:
                            # 本地图片
                            full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
                            if os.path.exists(full_path):
                                doc.add_picture(full_path, width=Inches(4.0))
                    except Exception as e:
                        # 如果添加图片失败，添加图片URL作为文本
                        doc.add_paragraph(f"[图片: {src}]")
                # 移除原始img标签
                img.decompose()
            content_text = soup.get_text()
            doc.add_paragraph(content_text)
            # 添加媒体文件信息
            if article.media_files:
                doc.add_heading('媒体文件', level=2)
                for media_file in article.media_files:
                    try:
                        import os
                        from django.conf import settings
                        from io import BytesIO
                        import requests
                        full_path = os.path.join(settings.MEDIA_ROOT, media_file)
                        if os.path.exists(full_path):
                            # 添加图片到文档
                            doc.add_picture(full_path, width=Inches(4.0))
                        else:
                            # 如果是URL格式的媒体文件
                            if media_file.startswith('http'):
                                response = requests.get(media_file, timeout=10)
                                image_stream = BytesIO(response.content)
                                doc.add_picture(image_stream, width=Inches(4.0))
                            else:
                                doc.add_paragraph(media_file)
                    except Exception as e:
                        doc.add_paragraph(media_file)
            # 添加分页符
            doc.add_page_break()
        # 保存到内存
        buffer = BytesIO()
        doc.save(buffer)
        buffer.seek(0)
        # 创建HttpResponse
        from django.http import HttpResponse
        response = HttpResponse(buffer.getvalue(), content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
        response['Content-Disposition'] = 'attachment; filename=articles.docx'
        return response
    export_as_word.short_description = "导出选中文章为Word格式"
 # 为不同网站创建专门的文章管理类
 class NewsCnArticleAdmin(admin.ModelAdmin):
    list_display = ('title', 'pub_date')
--- a/core/management/commands/export_articles.py
+++ b/core/management/commands/export_articles.py
@@ -78,8 +78,11 @@ class Command(BaseCommand):
                self.export_as_json(articles_data, output_path)
            elif format_type == 'csv':
                self.export_as_csv(articles_data, output_path)
            # 添加Word格式导出支持
            elif format_type == 'docx':
                self.export_as_word(articles_data, output_path)
            else:
-                self.stdout.write(self.style.ERROR('不支持的格式，仅支持 json 或 csv'))
+                self.stdout.write(self.style.ERROR('不支持的格式，仅支持 json、csv 或 docx'))
                return
        self.stdout.write(self.style.SUCCESS(f'成功导出 {len(articles_data)} 篇文章到 {output_path}'))
@@ -103,6 +106,98 @@ class Command(BaseCommand):
                article_data['media_files'] = ';'.join(article_data['media_files']) if article_data['media_files'] else ''
                writer.writerow(article_data)
    # 添加Word格式导出方法
    def export_as_word(self, articles_data, output_path):
        try:
            from docx import Document
            from docx.shared import Inches
        except ImportError:
            self.stdout.write(self.style.ERROR('缺少python-docx库，请安装: pip install python-docx'))
            return
        # 创建Word文档
        doc = Document()
        doc.add_heading('文章导出', 0)
        for article_data in articles_data:
            # 添加文章标题
            doc.add_heading(article_data['title'], level=1)
            # 添加文章元数据
            doc.add_paragraph(f"网站: {article_data['website']}")
            doc.add_paragraph(f"URL: {article_data['url']}")
            doc.add_paragraph(f"发布时间: {article_data['pub_date']}")
            doc.add_paragraph(f"创建时间: {article_data['created_at']}")
            # 添加文章内容
            doc.add_heading('内容', level=2)
            # 简单处理HTML内容，移除标签
            from bs4 import BeautifulSoup
            soup = BeautifulSoup(article_data['content'], 'html.parser')
            # 处理内容中的图片
            for img in soup.find_all('img'):
                src = img.get('src', '')
                if src:
                    # 尝试添加图片到文档
                    try:
                        import os
                        from django.conf import settings
                        import requests
                        from io import BytesIO
                        # 构建完整的图片路径
                        if src.startswith('http'):
                            # 网络图片
                            response = requests.get(src, timeout=10)
                            image_stream = BytesIO(response.content)
                            doc.add_picture(image_stream, width=Inches(4.0))
                        else:
                            # 本地图片
                            full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
                            if os.path.exists(full_path):
                                doc.add_picture(full_path, width=Inches(4.0))
                    except Exception as e:
                        # 如果添加图片失败，添加图片URL作为文本
                        doc.add_paragraph(f"[图片: {src}]")
                # 移除原始img标签
                img.decompose()
            content_text = soup.get_text()
            doc.add_paragraph(content_text)
            # 添加媒体文件信息
            if article_data['media_files']:
                doc.add_heading('媒体文件', level=2)
                for media_file in article_data['media_files']:
                    try:
                        import os
                        from django.conf import settings
                        from io import BytesIO
                        import requests
                        full_path = os.path.join(settings.MEDIA_ROOT, media_file)
                        if os.path.exists(full_path):
                            # 添加图片到文档
                            doc.add_picture(full_path, width=Inches(4.0))
                        else:
                            # 如果是URL格式的媒体文件
                            if media_file.startswith('http'):
                                response = requests.get(media_file, timeout=10)
                                image_stream = BytesIO(response.content)
                                doc.add_picture(image_stream, width=Inches(4.0))
                            else:
                                doc.add_paragraph(media_file)
                    except Exception as e:
                        doc.add_paragraph(media_file)
            # 添加分页符
            doc.add_page_break()
        # 保存文档
        doc.save(output_path)
    def export_with_media(self, articles_data, media_files, output_path, format_type):
        # 创建ZIP文件
        with zipfile.ZipFile(output_path, 'w') as zipf:
@@ -123,6 +218,90 @@ class Command(BaseCommand):
                        article_data['media_files'] = ';'.join(article_data['media_files']) if article_data['media_files'] else ''
                        writer.writerow(article_data)
                    zipf.writestr(data_filename, csv_buffer.getvalue())
            # 添加Word格式支持
            elif format_type == 'docx':
                # 创建Word文档并保存到ZIP
                try:
                    from docx import Document
                    from docx.shared import Inches
                    from io import BytesIO
                    doc = Document()
                    doc.add_heading('文章导出', 0)
                    for article_data in articles_data:
                        doc.add_heading(article_data['title'], level=1)
                        doc.add_paragraph(f"网站: {article_data['website']}")
                        doc.add_paragraph(f"URL: {article_data['url']}")
                        doc.add_paragraph(f"发布时间: {article_data['pub_date']}")
                        doc.add_paragraph(f"创建时间: {article_data['created_at']}")
                        doc.add_heading('内容', level=2)
                        from bs4 import BeautifulSoup
                        soup = BeautifulSoup(article_data['content'], 'html.parser')
                        # 处理内容中的图片
                        for img in soup.find_all('img'):
                            src = img.get('src', '')
                            if src:
                                # 尝试添加图片到文档
                                try:
                                    import os
                                    from django.conf import settings
                                    import requests
                                    # 构建完整的图片路径
                                    if src.startswith('http'):
                                        # 网络图片
                                        response = requests.get(src, timeout=10)
                                        image_stream = BytesIO(response.content)
                                        doc.add_picture(image_stream, width=Inches(4.0))
                                    else:
                                        # 本地图片
                                        full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
                                        if os.path.exists(full_path):
                                            doc.add_picture(full_path, width=Inches(4.0))
                                except Exception as e:
                                    # 如果添加图片失败，添加图片URL作为文本
                                    doc.add_paragraph(f"[图片: {src}]")
                            # 移除原始img标签
                            img.decompose()
                        content_text = soup.get_text()
                        doc.add_paragraph(content_text)
                        if article_data['media_files']:
                            doc.add_heading('媒体文件', level=2)
                            for media_file in article_data['media_files']:
                                try:
                                    import os
                                    from django.conf import settings
                                    full_path = os.path.join(settings.MEDIA_ROOT, media_file)
                                    if os.path.exists(full_path):
                                        # 添加图片到文档
                                        doc.add_picture(full_path, width=Inches(4.0))
                                    else:
                                        # 如果是URL格式的媒体文件
                                        if media_file.startswith('http'):
                                            response = requests.get(media_file, timeout=10)
                                            image_stream = BytesIO(response.content)
                                            doc.add_picture(image_stream, width=Inches(4.0))
                                        else:
                                            doc.add_paragraph(media_file)
                                except Exception as e:
                                    doc.add_paragraph(media_file)
                        doc.add_page_break()
                    # 将文档保存到内存中再写入ZIP
                    doc_buffer = BytesIO()
                    doc.save(doc_buffer)
                    doc_buffer.seek(0)
                    zipf.writestr(data_filename, doc_buffer.read())
                except ImportError:
                    zipf.writestr(data_filename, "错误：缺少python-docx库，无法生成Word文档")
            # 添加媒体文件
            for media_path in media_files: