fix URLS error and support download file

This commit is contained in:
2025-09-23 14:45:27 +08:00
parent e51154bb29
commit 7a4045048e
7 changed files with 76 additions and 4 deletions

2
.gitignore vendored
View File

@@ -174,3 +174,5 @@ cython_debug/
# PyPI configuration file
.pypirc
media/

0
celerybeat-schedule.db Normal file
View File

View File

@@ -83,7 +83,7 @@ class CrawledContentAdmin(admin.ModelAdmin):
def preview_content(self, obj):
"""预览内容"""
if obj.is_local_saved:
url = reverse('admin:crawled_content_preview', args=[obj.id])
url = reverse('preview_crawled_content', args=[obj.id])
return format_html(
'<a href="{}" target="_blank" class="button">预览文章</a>',
url

View File

@@ -186,6 +186,9 @@
{% endif %}
</small>
<div>
<a href="{% url 'download_crawled_content' content.id %}" class="btn btn-sm btn-outline-primary" title="下载">
<i class="bi bi-download"></i>
</a>
{% for keyword in content.keywords_matched|split:"," %}
<span class="keyword-badge">{{ keyword|strip }}</span>
{% endfor %}

View File

@@ -5,4 +5,5 @@ urlpatterns = [
path('', views.dashboard, name='dashboard'),
path('search/', views.search_page, name='search'),
path('crawled-content/<int:content_id>/preview/', views.preview_crawled_content, name='preview_crawled_content'),
path('crawled-content/<int:content_id>/download/', views.download_crawled_content, name='download_crawled_content'),
]

View File

@@ -1,9 +1,10 @@
from django.shortcuts import render, get_object_or_404
from django.http import HttpResponse
from django.http import HttpResponse, Http404
from django.db.models import Q, Count
from django.conf import settings
from django.utils import timezone
from .models import Website, CrawlTask, CrawledContent, CrawlLog, SearchKeyword
from django.core.files.storage import default_storage
from .models import Website, CrawlTask, CrawledContent, CrawlLog, SearchKeyword, MediaFile
from rest_framework import viewsets, filters
from rest_framework.decorators import action
from rest_framework.response import Response
@@ -15,6 +16,12 @@ import json
from django.core.paginator import Paginator
from django.db.models.functions import TruncDate
from django.db.models import Count
import os
import tempfile
import zipfile
from io import BytesIO
from docx import Document
from django.core.files.base import ContentFile
def dashboard(request):
@@ -289,4 +296,62 @@ def preview_crawled_content(request, content_id):
</body>
</html>
"""
return HttpResponse(html_content, content_type='text/html; charset=utf-8')
return HttpResponse(html_content, content_type='text/html; charset=utf-8')
def download_crawled_content(request, content_id):
"""下载文章内容为压缩包包含Word文档和媒体文件"""
content = get_object_or_404(CrawledContent, id=content_id)
# 创建内存中的字节流用于存储zip文件
zip_buffer = BytesIO()
# 创建zip文件
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
# 创建Word文档
doc = Document()
doc.add_heading(content.title, 0)
# 添加元数据
doc.add_paragraph(f'来源网站: {content.website.name} ({content.website.region})')
doc.add_paragraph(f'原始链接: {content.url}')
doc.add_paragraph(f'发布时间: {content.publish_date or "未知"}')
doc.add_paragraph(f'作者: {content.author or "未知"}')
doc.add_paragraph(f'匹配关键字: {content.keywords_matched}')
doc.add_paragraph(f'爬取时间: {content.created_at}')
# 添加内容
doc.add_heading('正文', level=1)
for paragraph in content.content.split('\n\n'):
if paragraph.strip():
doc.add_paragraph(paragraph.strip())
# 保存Word文档到内存
doc_buffer = BytesIO()
doc.save(doc_buffer)
doc_buffer.seek(0)
# 添加Word文档到zip文件
zip_file.writestr(f"{content.title[:50]}.docx", doc_buffer.getvalue())
# 添加媒体文件到zip文件
media_files = content.media_files.all()
for media_file in media_files:
try:
# 获取媒体文件的本地路径
if media_file.local_file and default_storage.exists(media_file.local_file.name):
# 读取文件内容
file_content = default_storage.open(media_file.local_file.name).read()
# 添加到zip文件中
zip_file.writestr(f"media/{os.path.basename(media_file.local_file.name)}", file_content)
except Exception as e:
# 如果文件无法读取,记录错误但继续处理其他文件
pass
# 准备响应
zip_buffer.seek(0)
response = HttpResponse(zip_buffer.getvalue(), content_type='application/zip')
filename = f"{content.title[:50]}.zip"
response['Content-Disposition'] = f'attachment; filename="{filename}"'
return response

View File

@@ -27,3 +27,4 @@ tzdata==2025.2
urllib3==2.5.0
vine==5.1.0
wcwidth==0.2.14
python-docx==1.2.0