fix URLS error and support download file
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -174,3 +174,5 @@ cython_debug/
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
|
||||
media/
|
||||
|
||||
0
celerybeat-schedule.db
Normal file
0
celerybeat-schedule.db
Normal file
@@ -83,7 +83,7 @@ class CrawledContentAdmin(admin.ModelAdmin):
|
||||
def preview_content(self, obj):
|
||||
"""预览内容"""
|
||||
if obj.is_local_saved:
|
||||
url = reverse('admin:crawled_content_preview', args=[obj.id])
|
||||
url = reverse('preview_crawled_content', args=[obj.id])
|
||||
return format_html(
|
||||
'<a href="{}" target="_blank" class="button">预览文章</a>',
|
||||
url
|
||||
|
||||
@@ -186,6 +186,9 @@
|
||||
{% endif %}
|
||||
</small>
|
||||
<div>
|
||||
<a href="{% url 'download_crawled_content' content.id %}" class="btn btn-sm btn-outline-primary" title="下载">
|
||||
<i class="bi bi-download"></i>
|
||||
</a>
|
||||
{% for keyword in content.keywords_matched|split:"," %}
|
||||
<span class="keyword-badge">{{ keyword|strip }}</span>
|
||||
{% endfor %}
|
||||
|
||||
@@ -5,4 +5,5 @@ urlpatterns = [
|
||||
path('', views.dashboard, name='dashboard'),
|
||||
path('search/', views.search_page, name='search'),
|
||||
path('crawled-content/<int:content_id>/preview/', views.preview_crawled_content, name='preview_crawled_content'),
|
||||
path('crawled-content/<int:content_id>/download/', views.download_crawled_content, name='download_crawled_content'),
|
||||
]
|
||||
@@ -1,9 +1,10 @@
|
||||
from django.shortcuts import render, get_object_or_404
|
||||
from django.http import HttpResponse
|
||||
from django.http import HttpResponse, Http404
|
||||
from django.db.models import Q, Count
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from .models import Website, CrawlTask, CrawledContent, CrawlLog, SearchKeyword
|
||||
from django.core.files.storage import default_storage
|
||||
from .models import Website, CrawlTask, CrawledContent, CrawlLog, SearchKeyword, MediaFile
|
||||
from rest_framework import viewsets, filters
|
||||
from rest_framework.decorators import action
|
||||
from rest_framework.response import Response
|
||||
@@ -15,6 +16,12 @@ import json
|
||||
from django.core.paginator import Paginator
|
||||
from django.db.models.functions import TruncDate
|
||||
from django.db.models import Count
|
||||
import os
|
||||
import tempfile
|
||||
import zipfile
|
||||
from io import BytesIO
|
||||
from docx import Document
|
||||
from django.core.files.base import ContentFile
|
||||
|
||||
|
||||
def dashboard(request):
|
||||
@@ -289,4 +296,62 @@ def preview_crawled_content(request, content_id):
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return HttpResponse(html_content, content_type='text/html; charset=utf-8')
|
||||
return HttpResponse(html_content, content_type='text/html; charset=utf-8')
|
||||
|
||||
|
||||
def download_crawled_content(request, content_id):
|
||||
"""下载文章内容为压缩包(包含Word文档和媒体文件)"""
|
||||
content = get_object_or_404(CrawledContent, id=content_id)
|
||||
|
||||
# 创建内存中的字节流用于存储zip文件
|
||||
zip_buffer = BytesIO()
|
||||
|
||||
# 创建zip文件
|
||||
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
||||
# 创建Word文档
|
||||
doc = Document()
|
||||
doc.add_heading(content.title, 0)
|
||||
|
||||
# 添加元数据
|
||||
doc.add_paragraph(f'来源网站: {content.website.name} ({content.website.region})')
|
||||
doc.add_paragraph(f'原始链接: {content.url}')
|
||||
doc.add_paragraph(f'发布时间: {content.publish_date or "未知"}')
|
||||
doc.add_paragraph(f'作者: {content.author or "未知"}')
|
||||
doc.add_paragraph(f'匹配关键字: {content.keywords_matched}')
|
||||
doc.add_paragraph(f'爬取时间: {content.created_at}')
|
||||
|
||||
# 添加内容
|
||||
doc.add_heading('正文', level=1)
|
||||
for paragraph in content.content.split('\n\n'):
|
||||
if paragraph.strip():
|
||||
doc.add_paragraph(paragraph.strip())
|
||||
|
||||
# 保存Word文档到内存
|
||||
doc_buffer = BytesIO()
|
||||
doc.save(doc_buffer)
|
||||
doc_buffer.seek(0)
|
||||
|
||||
# 添加Word文档到zip文件
|
||||
zip_file.writestr(f"{content.title[:50]}.docx", doc_buffer.getvalue())
|
||||
|
||||
# 添加媒体文件到zip文件
|
||||
media_files = content.media_files.all()
|
||||
for media_file in media_files:
|
||||
try:
|
||||
# 获取媒体文件的本地路径
|
||||
if media_file.local_file and default_storage.exists(media_file.local_file.name):
|
||||
# 读取文件内容
|
||||
file_content = default_storage.open(media_file.local_file.name).read()
|
||||
# 添加到zip文件中
|
||||
zip_file.writestr(f"media/{os.path.basename(media_file.local_file.name)}", file_content)
|
||||
except Exception as e:
|
||||
# 如果文件无法读取,记录错误但继续处理其他文件
|
||||
pass
|
||||
|
||||
# 准备响应
|
||||
zip_buffer.seek(0)
|
||||
response = HttpResponse(zip_buffer.getvalue(), content_type='application/zip')
|
||||
filename = f"{content.title[:50]}.zip"
|
||||
response['Content-Disposition'] = f'attachment; filename="{filename}"'
|
||||
|
||||
return response
|
||||
|
||||
@@ -27,3 +27,4 @@ tzdata==2025.2
|
||||
urllib3==2.5.0
|
||||
vine==5.1.0
|
||||
wcwidth==0.2.14
|
||||
python-docx==1.2.0
|
||||
Reference in New Issue
Block a user