From b6bbb90703128e76e7d84937141a3e3e4aff63a8 Mon Sep 17 00:00:00 2001 From: yuangyaa Date: Mon, 11 Aug 2025 23:14:56 +0800 Subject: [PATCH] Support export for Word --- core/admin.py | 105 +++++++++++- core/management/commands/export_articles.py | 181 +++++++++++++++++++- 2 files changed, 284 insertions(+), 2 deletions(-) diff --git a/core/admin.py b/core/admin.py index 39e48b0..ccefc8b 100644 --- a/core/admin.py +++ b/core/admin.py @@ -34,7 +34,7 @@ class ArticleAdmin(admin.ModelAdmin): list_display = ('title', 'website', 'pub_date') search_fields = ('title', 'content') # 添加动作选项 - actions = ['delete_selected_articles', 'delete_dongfangyancao_articles', 'export_as_csv', 'export_as_json'] + actions = ['delete_selected_articles', 'delete_dongfangyancao_articles', 'export_as_csv', 'export_as_json', 'export_as_word'] def delete_dongfangyancao_articles(self, request, queryset): """一键删除东方烟草报的所有文章""" @@ -93,6 +93,109 @@ class ArticleAdmin(admin.ModelAdmin): export_as_json.short_description = "导出选中文章为JSON格式" + def export_as_word(self, request, queryset): + """导出选中的文章为Word格式""" + try: + from docx import Document + from io import BytesIO + from docx.shared import Inches + except ImportError: + self.message_user(request, "缺少python-docx库,请安装: pip install python-docx", messages.ERROR) + return + + # 创建Word文档 + doc = Document() + doc.add_heading('文章导出', 0) + + for article in queryset: + # 添加文章标题 + doc.add_heading(article.title, level=1) + + # 添加文章元数据 + doc.add_paragraph(f"网站: {article.website.name}") + doc.add_paragraph(f"URL: {article.url}") + doc.add_paragraph(f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}") + doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S')}") + + # 添加文章内容 + doc.add_heading('内容', level=2) + # 简单处理HTML内容,移除标签并处理图片 + from bs4 import BeautifulSoup + soup = BeautifulSoup(article.content, 'html.parser') + + # 处理内容中的图片 + for img in soup.find_all('img'): + src = img.get('src', '') + if src: + # 尝试添加图片到文档 + try: + import os + from django.conf import settings + import requests + from io import BytesIO + + # 构建完整的图片路径 + if src.startswith('http'): + # 网络图片 + response = requests.get(src, timeout=10) + image_stream = BytesIO(response.content) + doc.add_picture(image_stream, width=Inches(4.0)) + else: + # 本地图片 + full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/')) + if os.path.exists(full_path): + doc.add_picture(full_path, width=Inches(4.0)) + except Exception as e: + # 如果添加图片失败,添加图片URL作为文本 + doc.add_paragraph(f"[图片: {src}]") + + # 移除原始img标签 + img.decompose() + + content_text = soup.get_text() + doc.add_paragraph(content_text) + + # 添加媒体文件信息 + if article.media_files: + doc.add_heading('媒体文件', level=2) + for media_file in article.media_files: + try: + import os + from django.conf import settings + from io import BytesIO + import requests + + full_path = os.path.join(settings.MEDIA_ROOT, media_file) + if os.path.exists(full_path): + # 添加图片到文档 + doc.add_picture(full_path, width=Inches(4.0)) + else: + # 如果是URL格式的媒体文件 + if media_file.startswith('http'): + response = requests.get(media_file, timeout=10) + image_stream = BytesIO(response.content) + doc.add_picture(image_stream, width=Inches(4.0)) + else: + doc.add_paragraph(media_file) + except Exception as e: + doc.add_paragraph(media_file) + + # 添加分页符 + doc.add_page_break() + + # 保存到内存 + buffer = BytesIO() + doc.save(buffer) + buffer.seek(0) + + # 创建HttpResponse + from django.http import HttpResponse + response = HttpResponse(buffer.getvalue(), content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document') + response['Content-Disposition'] = 'attachment; filename=articles.docx' + return response + + export_as_word.short_description = "导出选中文章为Word格式" + # 为不同网站创建专门的文章管理类 class NewsCnArticleAdmin(admin.ModelAdmin): list_display = ('title', 'pub_date') diff --git a/core/management/commands/export_articles.py b/core/management/commands/export_articles.py index 5623b78..16eac9b 100644 --- a/core/management/commands/export_articles.py +++ b/core/management/commands/export_articles.py @@ -78,8 +78,11 @@ class Command(BaseCommand): self.export_as_json(articles_data, output_path) elif format_type == 'csv': self.export_as_csv(articles_data, output_path) + # 添加Word格式导出支持 + elif format_type == 'docx': + self.export_as_word(articles_data, output_path) else: - self.stdout.write(self.style.ERROR('不支持的格式,仅支持 json 或 csv')) + self.stdout.write(self.style.ERROR('不支持的格式,仅支持 json、csv 或 docx')) return self.stdout.write(self.style.SUCCESS(f'成功导出 {len(articles_data)} 篇文章到 {output_path}')) @@ -103,6 +106,98 @@ class Command(BaseCommand): article_data['media_files'] = ';'.join(article_data['media_files']) if article_data['media_files'] else '' writer.writerow(article_data) + # 添加Word格式导出方法 + def export_as_word(self, articles_data, output_path): + try: + from docx import Document + from docx.shared import Inches + except ImportError: + self.stdout.write(self.style.ERROR('缺少python-docx库,请安装: pip install python-docx')) + return + + # 创建Word文档 + doc = Document() + doc.add_heading('文章导出', 0) + + for article_data in articles_data: + # 添加文章标题 + doc.add_heading(article_data['title'], level=1) + + # 添加文章元数据 + doc.add_paragraph(f"网站: {article_data['website']}") + doc.add_paragraph(f"URL: {article_data['url']}") + doc.add_paragraph(f"发布时间: {article_data['pub_date']}") + doc.add_paragraph(f"创建时间: {article_data['created_at']}") + + # 添加文章内容 + doc.add_heading('内容', level=2) + # 简单处理HTML内容,移除标签 + from bs4 import BeautifulSoup + soup = BeautifulSoup(article_data['content'], 'html.parser') + + # 处理内容中的图片 + for img in soup.find_all('img'): + src = img.get('src', '') + if src: + # 尝试添加图片到文档 + try: + import os + from django.conf import settings + import requests + from io import BytesIO + + # 构建完整的图片路径 + if src.startswith('http'): + # 网络图片 + response = requests.get(src, timeout=10) + image_stream = BytesIO(response.content) + doc.add_picture(image_stream, width=Inches(4.0)) + else: + # 本地图片 + full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/')) + if os.path.exists(full_path): + doc.add_picture(full_path, width=Inches(4.0)) + except Exception as e: + # 如果添加图片失败,添加图片URL作为文本 + doc.add_paragraph(f"[图片: {src}]") + + # 移除原始img标签 + img.decompose() + + content_text = soup.get_text() + doc.add_paragraph(content_text) + + # 添加媒体文件信息 + if article_data['media_files']: + doc.add_heading('媒体文件', level=2) + for media_file in article_data['media_files']: + try: + import os + from django.conf import settings + from io import BytesIO + import requests + + full_path = os.path.join(settings.MEDIA_ROOT, media_file) + if os.path.exists(full_path): + # 添加图片到文档 + doc.add_picture(full_path, width=Inches(4.0)) + else: + # 如果是URL格式的媒体文件 + if media_file.startswith('http'): + response = requests.get(media_file, timeout=10) + image_stream = BytesIO(response.content) + doc.add_picture(image_stream, width=Inches(4.0)) + else: + doc.add_paragraph(media_file) + except Exception as e: + doc.add_paragraph(media_file) + + # 添加分页符 + doc.add_page_break() + + # 保存文档 + doc.save(output_path) + def export_with_media(self, articles_data, media_files, output_path, format_type): # 创建ZIP文件 with zipfile.ZipFile(output_path, 'w') as zipf: @@ -123,6 +218,90 @@ class Command(BaseCommand): article_data['media_files'] = ';'.join(article_data['media_files']) if article_data['media_files'] else '' writer.writerow(article_data) zipf.writestr(data_filename, csv_buffer.getvalue()) + # 添加Word格式支持 + elif format_type == 'docx': + # 创建Word文档并保存到ZIP + try: + from docx import Document + from docx.shared import Inches + from io import BytesIO + + doc = Document() + doc.add_heading('文章导出', 0) + + for article_data in articles_data: + doc.add_heading(article_data['title'], level=1) + doc.add_paragraph(f"网站: {article_data['website']}") + doc.add_paragraph(f"URL: {article_data['url']}") + doc.add_paragraph(f"发布时间: {article_data['pub_date']}") + doc.add_paragraph(f"创建时间: {article_data['created_at']}") + + doc.add_heading('内容', level=2) + from bs4 import BeautifulSoup + soup = BeautifulSoup(article_data['content'], 'html.parser') + + # 处理内容中的图片 + for img in soup.find_all('img'): + src = img.get('src', '') + if src: + # 尝试添加图片到文档 + try: + import os + from django.conf import settings + import requests + + # 构建完整的图片路径 + if src.startswith('http'): + # 网络图片 + response = requests.get(src, timeout=10) + image_stream = BytesIO(response.content) + doc.add_picture(image_stream, width=Inches(4.0)) + else: + # 本地图片 + full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/')) + if os.path.exists(full_path): + doc.add_picture(full_path, width=Inches(4.0)) + except Exception as e: + # 如果添加图片失败,添加图片URL作为文本 + doc.add_paragraph(f"[图片: {src}]") + + # 移除原始img标签 + img.decompose() + + content_text = soup.get_text() + doc.add_paragraph(content_text) + + if article_data['media_files']: + doc.add_heading('媒体文件', level=2) + for media_file in article_data['media_files']: + try: + import os + from django.conf import settings + + full_path = os.path.join(settings.MEDIA_ROOT, media_file) + if os.path.exists(full_path): + # 添加图片到文档 + doc.add_picture(full_path, width=Inches(4.0)) + else: + # 如果是URL格式的媒体文件 + if media_file.startswith('http'): + response = requests.get(media_file, timeout=10) + image_stream = BytesIO(response.content) + doc.add_picture(image_stream, width=Inches(4.0)) + else: + doc.add_paragraph(media_file) + except Exception as e: + doc.add_paragraph(media_file) + + doc.add_page_break() + + # 将文档保存到内存中再写入ZIP + doc_buffer = BytesIO() + doc.save(doc_buffer) + doc_buffer.seek(0) + zipf.writestr(data_filename, doc_buffer.read()) + except ImportError: + zipf.writestr(data_filename, "错误:缺少python-docx库,无法生成Word文档") # 添加媒体文件 for media_path in media_files: