Support export for Word
This commit is contained in:
105
core/admin.py
105
core/admin.py
@@ -34,7 +34,7 @@ class ArticleAdmin(admin.ModelAdmin):
|
|||||||
list_display = ('title', 'website', 'pub_date')
|
list_display = ('title', 'website', 'pub_date')
|
||||||
search_fields = ('title', 'content')
|
search_fields = ('title', 'content')
|
||||||
# 添加动作选项
|
# 添加动作选项
|
||||||
actions = ['delete_selected_articles', 'delete_dongfangyancao_articles', 'export_as_csv', 'export_as_json']
|
actions = ['delete_selected_articles', 'delete_dongfangyancao_articles', 'export_as_csv', 'export_as_json', 'export_as_word']
|
||||||
|
|
||||||
def delete_dongfangyancao_articles(self, request, queryset):
|
def delete_dongfangyancao_articles(self, request, queryset):
|
||||||
"""一键删除东方烟草报的所有文章"""
|
"""一键删除东方烟草报的所有文章"""
|
||||||
@@ -93,6 +93,109 @@ class ArticleAdmin(admin.ModelAdmin):
|
|||||||
|
|
||||||
export_as_json.short_description = "导出选中文章为JSON格式"
|
export_as_json.short_description = "导出选中文章为JSON格式"
|
||||||
|
|
||||||
|
def export_as_word(self, request, queryset):
|
||||||
|
"""导出选中的文章为Word格式"""
|
||||||
|
try:
|
||||||
|
from docx import Document
|
||||||
|
from io import BytesIO
|
||||||
|
from docx.shared import Inches
|
||||||
|
except ImportError:
|
||||||
|
self.message_user(request, "缺少python-docx库,请安装: pip install python-docx", messages.ERROR)
|
||||||
|
return
|
||||||
|
|
||||||
|
# 创建Word文档
|
||||||
|
doc = Document()
|
||||||
|
doc.add_heading('文章导出', 0)
|
||||||
|
|
||||||
|
for article in queryset:
|
||||||
|
# 添加文章标题
|
||||||
|
doc.add_heading(article.title, level=1)
|
||||||
|
|
||||||
|
# 添加文章元数据
|
||||||
|
doc.add_paragraph(f"网站: {article.website.name}")
|
||||||
|
doc.add_paragraph(f"URL: {article.url}")
|
||||||
|
doc.add_paragraph(f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}")
|
||||||
|
doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
|
||||||
|
# 添加文章内容
|
||||||
|
doc.add_heading('内容', level=2)
|
||||||
|
# 简单处理HTML内容,移除标签并处理图片
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
soup = BeautifulSoup(article.content, 'html.parser')
|
||||||
|
|
||||||
|
# 处理内容中的图片
|
||||||
|
for img in soup.find_all('img'):
|
||||||
|
src = img.get('src', '')
|
||||||
|
if src:
|
||||||
|
# 尝试添加图片到文档
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
from django.conf import settings
|
||||||
|
import requests
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
# 构建完整的图片路径
|
||||||
|
if src.startswith('http'):
|
||||||
|
# 网络图片
|
||||||
|
response = requests.get(src, timeout=10)
|
||||||
|
image_stream = BytesIO(response.content)
|
||||||
|
doc.add_picture(image_stream, width=Inches(4.0))
|
||||||
|
else:
|
||||||
|
# 本地图片
|
||||||
|
full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
|
||||||
|
if os.path.exists(full_path):
|
||||||
|
doc.add_picture(full_path, width=Inches(4.0))
|
||||||
|
except Exception as e:
|
||||||
|
# 如果添加图片失败,添加图片URL作为文本
|
||||||
|
doc.add_paragraph(f"[图片: {src}]")
|
||||||
|
|
||||||
|
# 移除原始img标签
|
||||||
|
img.decompose()
|
||||||
|
|
||||||
|
content_text = soup.get_text()
|
||||||
|
doc.add_paragraph(content_text)
|
||||||
|
|
||||||
|
# 添加媒体文件信息
|
||||||
|
if article.media_files:
|
||||||
|
doc.add_heading('媒体文件', level=2)
|
||||||
|
for media_file in article.media_files:
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
from django.conf import settings
|
||||||
|
from io import BytesIO
|
||||||
|
import requests
|
||||||
|
|
||||||
|
full_path = os.path.join(settings.MEDIA_ROOT, media_file)
|
||||||
|
if os.path.exists(full_path):
|
||||||
|
# 添加图片到文档
|
||||||
|
doc.add_picture(full_path, width=Inches(4.0))
|
||||||
|
else:
|
||||||
|
# 如果是URL格式的媒体文件
|
||||||
|
if media_file.startswith('http'):
|
||||||
|
response = requests.get(media_file, timeout=10)
|
||||||
|
image_stream = BytesIO(response.content)
|
||||||
|
doc.add_picture(image_stream, width=Inches(4.0))
|
||||||
|
else:
|
||||||
|
doc.add_paragraph(media_file)
|
||||||
|
except Exception as e:
|
||||||
|
doc.add_paragraph(media_file)
|
||||||
|
|
||||||
|
# 添加分页符
|
||||||
|
doc.add_page_break()
|
||||||
|
|
||||||
|
# 保存到内存
|
||||||
|
buffer = BytesIO()
|
||||||
|
doc.save(buffer)
|
||||||
|
buffer.seek(0)
|
||||||
|
|
||||||
|
# 创建HttpResponse
|
||||||
|
from django.http import HttpResponse
|
||||||
|
response = HttpResponse(buffer.getvalue(), content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
|
||||||
|
response['Content-Disposition'] = 'attachment; filename=articles.docx'
|
||||||
|
return response
|
||||||
|
|
||||||
|
export_as_word.short_description = "导出选中文章为Word格式"
|
||||||
|
|
||||||
# 为不同网站创建专门的文章管理类
|
# 为不同网站创建专门的文章管理类
|
||||||
class NewsCnArticleAdmin(admin.ModelAdmin):
|
class NewsCnArticleAdmin(admin.ModelAdmin):
|
||||||
list_display = ('title', 'pub_date')
|
list_display = ('title', 'pub_date')
|
||||||
|
|||||||
@@ -78,8 +78,11 @@ class Command(BaseCommand):
|
|||||||
self.export_as_json(articles_data, output_path)
|
self.export_as_json(articles_data, output_path)
|
||||||
elif format_type == 'csv':
|
elif format_type == 'csv':
|
||||||
self.export_as_csv(articles_data, output_path)
|
self.export_as_csv(articles_data, output_path)
|
||||||
|
# 添加Word格式导出支持
|
||||||
|
elif format_type == 'docx':
|
||||||
|
self.export_as_word(articles_data, output_path)
|
||||||
else:
|
else:
|
||||||
self.stdout.write(self.style.ERROR('不支持的格式,仅支持 json 或 csv'))
|
self.stdout.write(self.style.ERROR('不支持的格式,仅支持 json、csv 或 docx'))
|
||||||
return
|
return
|
||||||
|
|
||||||
self.stdout.write(self.style.SUCCESS(f'成功导出 {len(articles_data)} 篇文章到 {output_path}'))
|
self.stdout.write(self.style.SUCCESS(f'成功导出 {len(articles_data)} 篇文章到 {output_path}'))
|
||||||
@@ -103,6 +106,98 @@ class Command(BaseCommand):
|
|||||||
article_data['media_files'] = ';'.join(article_data['media_files']) if article_data['media_files'] else ''
|
article_data['media_files'] = ';'.join(article_data['media_files']) if article_data['media_files'] else ''
|
||||||
writer.writerow(article_data)
|
writer.writerow(article_data)
|
||||||
|
|
||||||
|
# 添加Word格式导出方法
|
||||||
|
def export_as_word(self, articles_data, output_path):
|
||||||
|
try:
|
||||||
|
from docx import Document
|
||||||
|
from docx.shared import Inches
|
||||||
|
except ImportError:
|
||||||
|
self.stdout.write(self.style.ERROR('缺少python-docx库,请安装: pip install python-docx'))
|
||||||
|
return
|
||||||
|
|
||||||
|
# 创建Word文档
|
||||||
|
doc = Document()
|
||||||
|
doc.add_heading('文章导出', 0)
|
||||||
|
|
||||||
|
for article_data in articles_data:
|
||||||
|
# 添加文章标题
|
||||||
|
doc.add_heading(article_data['title'], level=1)
|
||||||
|
|
||||||
|
# 添加文章元数据
|
||||||
|
doc.add_paragraph(f"网站: {article_data['website']}")
|
||||||
|
doc.add_paragraph(f"URL: {article_data['url']}")
|
||||||
|
doc.add_paragraph(f"发布时间: {article_data['pub_date']}")
|
||||||
|
doc.add_paragraph(f"创建时间: {article_data['created_at']}")
|
||||||
|
|
||||||
|
# 添加文章内容
|
||||||
|
doc.add_heading('内容', level=2)
|
||||||
|
# 简单处理HTML内容,移除标签
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
soup = BeautifulSoup(article_data['content'], 'html.parser')
|
||||||
|
|
||||||
|
# 处理内容中的图片
|
||||||
|
for img in soup.find_all('img'):
|
||||||
|
src = img.get('src', '')
|
||||||
|
if src:
|
||||||
|
# 尝试添加图片到文档
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
from django.conf import settings
|
||||||
|
import requests
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
# 构建完整的图片路径
|
||||||
|
if src.startswith('http'):
|
||||||
|
# 网络图片
|
||||||
|
response = requests.get(src, timeout=10)
|
||||||
|
image_stream = BytesIO(response.content)
|
||||||
|
doc.add_picture(image_stream, width=Inches(4.0))
|
||||||
|
else:
|
||||||
|
# 本地图片
|
||||||
|
full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
|
||||||
|
if os.path.exists(full_path):
|
||||||
|
doc.add_picture(full_path, width=Inches(4.0))
|
||||||
|
except Exception as e:
|
||||||
|
# 如果添加图片失败,添加图片URL作为文本
|
||||||
|
doc.add_paragraph(f"[图片: {src}]")
|
||||||
|
|
||||||
|
# 移除原始img标签
|
||||||
|
img.decompose()
|
||||||
|
|
||||||
|
content_text = soup.get_text()
|
||||||
|
doc.add_paragraph(content_text)
|
||||||
|
|
||||||
|
# 添加媒体文件信息
|
||||||
|
if article_data['media_files']:
|
||||||
|
doc.add_heading('媒体文件', level=2)
|
||||||
|
for media_file in article_data['media_files']:
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
from django.conf import settings
|
||||||
|
from io import BytesIO
|
||||||
|
import requests
|
||||||
|
|
||||||
|
full_path = os.path.join(settings.MEDIA_ROOT, media_file)
|
||||||
|
if os.path.exists(full_path):
|
||||||
|
# 添加图片到文档
|
||||||
|
doc.add_picture(full_path, width=Inches(4.0))
|
||||||
|
else:
|
||||||
|
# 如果是URL格式的媒体文件
|
||||||
|
if media_file.startswith('http'):
|
||||||
|
response = requests.get(media_file, timeout=10)
|
||||||
|
image_stream = BytesIO(response.content)
|
||||||
|
doc.add_picture(image_stream, width=Inches(4.0))
|
||||||
|
else:
|
||||||
|
doc.add_paragraph(media_file)
|
||||||
|
except Exception as e:
|
||||||
|
doc.add_paragraph(media_file)
|
||||||
|
|
||||||
|
# 添加分页符
|
||||||
|
doc.add_page_break()
|
||||||
|
|
||||||
|
# 保存文档
|
||||||
|
doc.save(output_path)
|
||||||
|
|
||||||
def export_with_media(self, articles_data, media_files, output_path, format_type):
|
def export_with_media(self, articles_data, media_files, output_path, format_type):
|
||||||
# 创建ZIP文件
|
# 创建ZIP文件
|
||||||
with zipfile.ZipFile(output_path, 'w') as zipf:
|
with zipfile.ZipFile(output_path, 'w') as zipf:
|
||||||
@@ -123,6 +218,90 @@ class Command(BaseCommand):
|
|||||||
article_data['media_files'] = ';'.join(article_data['media_files']) if article_data['media_files'] else ''
|
article_data['media_files'] = ';'.join(article_data['media_files']) if article_data['media_files'] else ''
|
||||||
writer.writerow(article_data)
|
writer.writerow(article_data)
|
||||||
zipf.writestr(data_filename, csv_buffer.getvalue())
|
zipf.writestr(data_filename, csv_buffer.getvalue())
|
||||||
|
# 添加Word格式支持
|
||||||
|
elif format_type == 'docx':
|
||||||
|
# 创建Word文档并保存到ZIP
|
||||||
|
try:
|
||||||
|
from docx import Document
|
||||||
|
from docx.shared import Inches
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
doc = Document()
|
||||||
|
doc.add_heading('文章导出', 0)
|
||||||
|
|
||||||
|
for article_data in articles_data:
|
||||||
|
doc.add_heading(article_data['title'], level=1)
|
||||||
|
doc.add_paragraph(f"网站: {article_data['website']}")
|
||||||
|
doc.add_paragraph(f"URL: {article_data['url']}")
|
||||||
|
doc.add_paragraph(f"发布时间: {article_data['pub_date']}")
|
||||||
|
doc.add_paragraph(f"创建时间: {article_data['created_at']}")
|
||||||
|
|
||||||
|
doc.add_heading('内容', level=2)
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
soup = BeautifulSoup(article_data['content'], 'html.parser')
|
||||||
|
|
||||||
|
# 处理内容中的图片
|
||||||
|
for img in soup.find_all('img'):
|
||||||
|
src = img.get('src', '')
|
||||||
|
if src:
|
||||||
|
# 尝试添加图片到文档
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
from django.conf import settings
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# 构建完整的图片路径
|
||||||
|
if src.startswith('http'):
|
||||||
|
# 网络图片
|
||||||
|
response = requests.get(src, timeout=10)
|
||||||
|
image_stream = BytesIO(response.content)
|
||||||
|
doc.add_picture(image_stream, width=Inches(4.0))
|
||||||
|
else:
|
||||||
|
# 本地图片
|
||||||
|
full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
|
||||||
|
if os.path.exists(full_path):
|
||||||
|
doc.add_picture(full_path, width=Inches(4.0))
|
||||||
|
except Exception as e:
|
||||||
|
# 如果添加图片失败,添加图片URL作为文本
|
||||||
|
doc.add_paragraph(f"[图片: {src}]")
|
||||||
|
|
||||||
|
# 移除原始img标签
|
||||||
|
img.decompose()
|
||||||
|
|
||||||
|
content_text = soup.get_text()
|
||||||
|
doc.add_paragraph(content_text)
|
||||||
|
|
||||||
|
if article_data['media_files']:
|
||||||
|
doc.add_heading('媒体文件', level=2)
|
||||||
|
for media_file in article_data['media_files']:
|
||||||
|
try:
|
||||||
|
import os
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
full_path = os.path.join(settings.MEDIA_ROOT, media_file)
|
||||||
|
if os.path.exists(full_path):
|
||||||
|
# 添加图片到文档
|
||||||
|
doc.add_picture(full_path, width=Inches(4.0))
|
||||||
|
else:
|
||||||
|
# 如果是URL格式的媒体文件
|
||||||
|
if media_file.startswith('http'):
|
||||||
|
response = requests.get(media_file, timeout=10)
|
||||||
|
image_stream = BytesIO(response.content)
|
||||||
|
doc.add_picture(image_stream, width=Inches(4.0))
|
||||||
|
else:
|
||||||
|
doc.add_paragraph(media_file)
|
||||||
|
except Exception as e:
|
||||||
|
doc.add_paragraph(media_file)
|
||||||
|
|
||||||
|
doc.add_page_break()
|
||||||
|
|
||||||
|
# 将文档保存到内存中再写入ZIP
|
||||||
|
doc_buffer = BytesIO()
|
||||||
|
doc.save(doc_buffer)
|
||||||
|
doc_buffer.seek(0)
|
||||||
|
zipf.writestr(data_filename, doc_buffer.read())
|
||||||
|
except ImportError:
|
||||||
|
zipf.writestr(data_filename, "错误:缺少python-docx库,无法生成Word文档")
|
||||||
|
|
||||||
# 添加媒体文件
|
# 添加媒体文件
|
||||||
for media_path in media_files:
|
for media_path in media_files:
|
||||||
|
|||||||
Reference in New Issue
Block a user