637 lines
29 KiB
Python
637 lines
29 KiB
Python
import uuid
|
||
from django.shortcuts import render
|
||
from django.core.paginator import Paginator
|
||
from django.http import JsonResponse
|
||
from django.views.decorators.http import require_http_methods
|
||
from django.core.management import call_command
|
||
from .models import Article, Website
|
||
import threading
|
||
from django.http import HttpResponse
|
||
import json
|
||
import csv
|
||
from django.views.decorators.csrf import csrf_exempt
|
||
from django.utils import timezone
|
||
|
||
# 用于跟踪爬虫任务状态的全局字典
|
||
crawler_tasks = {}
|
||
|
||
|
||
def article_list(request):
|
||
# 获取所有启用的网站
|
||
websites = Website.objects.filter(enabled=True)
|
||
|
||
# 获取筛选网站
|
||
selected_website = None
|
||
# 修改:确保始终获取所有文章,除非有特定筛选
|
||
articles = Article.objects.all()
|
||
|
||
website_id = request.GET.get('website')
|
||
if website_id:
|
||
try:
|
||
selected_website = Website.objects.get(id=website_id)
|
||
articles = articles.filter(website=selected_website)
|
||
except Website.DoesNotExist:
|
||
pass
|
||
|
||
# 处理关键词搜索
|
||
search_query = request.GET.get('q')
|
||
if search_query:
|
||
articles = articles.filter(title__icontains=search_query)
|
||
|
||
# 新增:处理媒体类型筛选
|
||
media_type = request.GET.get('media_type', 'all')
|
||
if media_type == 'text_only':
|
||
# 纯文本文章(没有媒体文件)
|
||
articles = articles.filter(media_files__isnull=True) | articles.filter(media_files=[])
|
||
elif media_type == 'with_images':
|
||
# 包含图片的文章
|
||
articles = articles.filter(media_files__icontains='.jpg') | \
|
||
articles.filter(media_files__icontains='.jpeg') | \
|
||
articles.filter(media_files__icontains='.png') | \
|
||
articles.filter(media_files__icontains='.gif')
|
||
elif media_type == 'with_videos':
|
||
# 包含视频的文章
|
||
articles = articles.filter(media_files__icontains='.mp4') | \
|
||
articles.filter(media_files__icontains='.avi') | \
|
||
articles.filter(media_files__icontains='.mov') | \
|
||
articles.filter(media_files__icontains='.wmv') | \
|
||
articles.filter(media_files__icontains='.flv') | \
|
||
articles.filter(media_files__icontains='.webm')
|
||
|
||
# 按创建时间倒序排列
|
||
articles = articles.order_by('-created_at')
|
||
|
||
# 分页
|
||
paginator = Paginator(articles, 40) # 每页显示10篇文章
|
||
page_number = request.GET.get('page')
|
||
page_obj = paginator.get_page(page_number)
|
||
|
||
return render(request, 'core/article_list.html', {
|
||
'page_obj': page_obj,
|
||
'websites': websites,
|
||
'selected_website': selected_website,
|
||
'search_query': search_query
|
||
})
|
||
|
||
|
||
def article_detail(request, article_id):
|
||
article = Article.objects.get(id=article_id)
|
||
return render(request, 'core/article_detail.html', {'article': article})
|
||
|
||
|
||
# 添加任务ID生成和状态跟踪
|
||
@require_http_methods(["POST"])
|
||
def run_crawler(request):
|
||
"""
|
||
从前台触发爬虫任务
|
||
"""
|
||
try:
|
||
# 获取要执行的爬虫名称
|
||
crawler_name = request.POST.get('crawler_name', '')
|
||
if not crawler_name:
|
||
return JsonResponse({'status': 'error', 'message': '爬虫名称不能为空'})
|
||
|
||
# 生成任务ID
|
||
task_id = str(uuid.uuid4())
|
||
|
||
# 记录任务开始前的文章数量
|
||
initial_count = Article.objects.count()
|
||
|
||
# 在后台线程中运行爬虫任务
|
||
def run_spider():
|
||
try:
|
||
# 更新任务状态为运行中
|
||
crawler_tasks[task_id] = {
|
||
'status': 'running',
|
||
'message': '爬虫正在运行...',
|
||
'start_time': timezone.now(),
|
||
'initial_count': initial_count
|
||
}
|
||
|
||
# 根据爬虫名称调用相应的命令
|
||
if crawler_name in ['crawl_xinhua', 'crawl_dongfangyancao']:
|
||
call_command(crawler_name)
|
||
else:
|
||
# 如果是通用爬虫命令,使用crawl_articles
|
||
call_command('crawl_articles', crawler_name)
|
||
|
||
# 计算新增文章数量
|
||
final_count = Article.objects.count()
|
||
added_count = final_count - initial_count
|
||
|
||
# 更新任务状态为完成
|
||
crawler_tasks[task_id] = {
|
||
'status': 'completed',
|
||
'message': f'爬虫已完成,新增 {added_count} 篇文章',
|
||
'added_count': added_count,
|
||
'end_time': timezone.now()
|
||
}
|
||
except Exception as e:
|
||
# 修改:改进错误处理,提供更友好的错误信息
|
||
error_msg = str(e)
|
||
if "UNIQUE constraint failed" in error_msg and "core_article.url" in error_msg:
|
||
error_msg = "检测到重复文章URL,已跳过重复项"
|
||
else:
|
||
print(f"爬虫执行出错: {e}")
|
||
|
||
# 计算实际新增文章数量(即使有错误也统计)
|
||
final_count = Article.objects.count()
|
||
added_count = final_count - initial_count
|
||
|
||
# 更新任务状态为完成(即使有部分错误)
|
||
crawler_tasks[task_id] = {
|
||
'status': 'completed',
|
||
'message': f'爬虫已完成,新增 {added_count} 篇文章。{error_msg}',
|
||
'added_count': added_count,
|
||
'end_time': timezone.now(),
|
||
'error': error_msg
|
||
}
|
||
|
||
# 启动后台线程执行爬虫
|
||
thread = threading.Thread(target=run_spider)
|
||
thread.daemon = True
|
||
thread.start()
|
||
|
||
return JsonResponse({'status': 'success', 'message': f'爬虫 {crawler_name} 已启动', 'task_id': task_id})
|
||
except Exception as e:
|
||
return JsonResponse({'status': 'error', 'message': str(e)})
|
||
|
||
|
||
# 检查爬虫状态的视图
|
||
@require_http_methods(["POST"])
|
||
def crawler_status(request):
|
||
"""
|
||
检查爬虫任务状态
|
||
"""
|
||
try:
|
||
task_id = request.POST.get('task_id', '')
|
||
if not task_id:
|
||
return JsonResponse({'status': 'error', 'message': '任务ID不能为空'})
|
||
|
||
# 获取任务状态
|
||
task_info = crawler_tasks.get(task_id)
|
||
if not task_info:
|
||
return JsonResponse({'status': 'error', 'message': '未找到任务'})
|
||
|
||
return JsonResponse(task_info)
|
||
except Exception as e:
|
||
return JsonResponse({'status': 'error', 'message': str(e)})
|
||
|
||
|
||
# 新增:暂停爬虫的视图
|
||
@require_http_methods(["POST"])
|
||
def pause_crawler(request):
|
||
"""
|
||
暂停爬虫任务
|
||
"""
|
||
try:
|
||
task_id = request.POST.get('task_id', '')
|
||
if not task_id:
|
||
return JsonResponse({'status': 'error', 'message': '任务ID不能为空'})
|
||
|
||
# 获取任务状态
|
||
task_info = crawler_tasks.get(task_id)
|
||
if not task_info:
|
||
return JsonResponse({'status': 'error', 'message': '未找到任务'})
|
||
|
||
# 在实际应用中,这里应该实现真正的暂停逻辑
|
||
# 目前我们只是更新任务状态来模拟暂停功能
|
||
task_info['status'] = 'paused'
|
||
task_info['message'] = '爬虫已暂停'
|
||
|
||
return JsonResponse({
|
||
'status': 'success',
|
||
'message': '爬虫已暂停',
|
||
'progress': 0 # 这里应该返回实际进度
|
||
})
|
||
except Exception as e:
|
||
return JsonResponse({'status': 'error', 'message': str(e)})
|
||
|
||
|
||
# 新增:文章导出视图
|
||
@csrf_exempt
|
||
@require_http_methods(["POST"])
|
||
def export_articles(request):
|
||
try:
|
||
# 解析请求数据
|
||
data = json.loads(request.body)
|
||
article_ids = data.get('article_ids', [])
|
||
format_type = data.get('format', 'json')
|
||
|
||
# 获取选中的文章
|
||
articles = Article.objects.filter(id__in=article_ids)
|
||
|
||
if not articles.exists():
|
||
return HttpResponse('没有选中文章', status=400)
|
||
|
||
# 根据格式类型导出
|
||
if format_type == 'json':
|
||
# 准备JSON数据
|
||
articles_data = []
|
||
for article in articles:
|
||
articles_data.append({
|
||
'id': article.id,
|
||
'title': article.title,
|
||
'website': article.website.name,
|
||
'url': article.url,
|
||
'pub_date': article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else None,
|
||
'content': article.content,
|
||
'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
|
||
'media_files': article.media_files
|
||
})
|
||
|
||
# 创建JSON响应
|
||
response = HttpResponse(
|
||
json.dumps(articles_data, ensure_ascii=False, indent=2),
|
||
content_type='application/json'
|
||
)
|
||
response['Content-Disposition'] = 'attachment; filename="articles.json"'
|
||
return response
|
||
|
||
elif format_type == 'csv':
|
||
# 创建CSV响应
|
||
response = HttpResponse(content_type='text/csv')
|
||
response['Content-Disposition'] = 'attachment; filename="articles.csv"'
|
||
|
||
# 创建CSV写入器
|
||
writer = csv.writer(response)
|
||
writer.writerow(['ID', '标题', '网站', 'URL', '发布时间', '内容', '创建时间', '媒体文件'])
|
||
|
||
# 写入文章数据
|
||
for article in articles:
|
||
writer.writerow([
|
||
article.id,
|
||
article.title,
|
||
article.website.name,
|
||
article.url,
|
||
article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else '',
|
||
article.content,
|
||
article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
|
||
';'.join(article.media_files) if article.media_files else ''
|
||
])
|
||
|
||
return response
|
||
|
||
# 新增:支持ZIP格式导出
|
||
elif format_type == 'zip':
|
||
import zipfile
|
||
from io import BytesIO
|
||
from django.conf import settings
|
||
import os
|
||
|
||
# 创建内存中的ZIP文件
|
||
zip_buffer = BytesIO()
|
||
|
||
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
|
||
# 为每篇文章创建Word文档并添加到ZIP文件中
|
||
for article in articles:
|
||
# 为每篇文章创建单独的文件夹
|
||
article_folder = f"article_{article.id}_{article.title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')}"
|
||
|
||
# 创建文章数据
|
||
article_data = {
|
||
'id': article.id,
|
||
'title': article.title,
|
||
'website': article.website.name,
|
||
'url': article.url,
|
||
'pub_date': article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else None,
|
||
'content': article.content,
|
||
'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
|
||
'media_files': article.media_files
|
||
}
|
||
|
||
# 将文章数据保存为Word文件并添加到ZIP
|
||
try:
|
||
from docx import Document
|
||
from docx.shared import Inches
|
||
from io import BytesIO
|
||
from bs4 import BeautifulSoup
|
||
import requests
|
||
|
||
# 创建Word文档
|
||
doc = Document()
|
||
doc.add_heading(article.title, 0)
|
||
|
||
# 添加文章元数据
|
||
doc.add_paragraph(f"网站: {article.website.name}")
|
||
doc.add_paragraph(f"URL: {article.url}")
|
||
doc.add_paragraph(
|
||
f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}")
|
||
doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
|
||
|
||
# 添加文章内容
|
||
doc.add_heading('内容', level=1)
|
||
|
||
# 处理HTML内容
|
||
soup = BeautifulSoup(article.content, 'html.parser')
|
||
|
||
# 处理内容中的图片
|
||
for img in soup.find_all('img'):
|
||
src = img.get('src', '')
|
||
if src:
|
||
try:
|
||
# 构建完整的图片路径
|
||
if src.startswith('http'):
|
||
# 网络图片
|
||
response = requests.get(src, timeout=10)
|
||
image_stream = BytesIO(response.content)
|
||
doc.add_picture(image_stream, width=Inches(4.0))
|
||
else:
|
||
# 本地图片
|
||
full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
|
||
if os.path.exists(full_path):
|
||
doc.add_picture(full_path, width=Inches(4.0))
|
||
except Exception as e:
|
||
# 如果添加图片失败,添加图片URL作为文本
|
||
doc.add_paragraph(f"[图片: {src}]")
|
||
|
||
# 移除原始img标签
|
||
img.decompose()
|
||
|
||
content_text = soup.get_text()
|
||
doc.add_paragraph(content_text)
|
||
|
||
# 添加媒体文件信息
|
||
if article.media_files:
|
||
doc.add_heading('媒体文件', level=1)
|
||
for media_file in article.media_files:
|
||
try:
|
||
full_path = os.path.join(settings.MEDIA_ROOT, media_file)
|
||
if os.path.exists(full_path):
|
||
# 检查文件扩展名以确定处理方式
|
||
file_extension = os.path.splitext(media_file)[1].lower()
|
||
|
||
# 图片文件处理
|
||
if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
|
||
doc.add_picture(full_path, width=Inches(4.0))
|
||
# 视频文件处理
|
||
elif file_extension in ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm']:
|
||
doc.add_paragraph(f"[视频文件: {media_file}]")
|
||
# 其他文件类型
|
||
else:
|
||
doc.add_paragraph(f"[文件: {media_file}]")
|
||
else:
|
||
# 如果是URL格式的媒体文件
|
||
if media_file.startswith('http'):
|
||
response = requests.get(media_file, timeout=10)
|
||
file_extension = os.path.splitext(media_file)[1].lower()
|
||
|
||
# 图片文件处理
|
||
if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
|
||
image_stream = BytesIO(response.content)
|
||
doc.add_picture(image_stream, width=Inches(4.0))
|
||
else:
|
||
doc.add_paragraph(f"[文件: {media_file}]")
|
||
else:
|
||
doc.add_paragraph(media_file)
|
||
except Exception as e:
|
||
doc.add_paragraph(media_file)
|
||
|
||
# 保存Word文档到内存
|
||
doc_buffer = BytesIO()
|
||
doc.save(doc_buffer)
|
||
doc_buffer.seek(0)
|
||
|
||
# 将Word文档添加到ZIP包
|
||
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'),
|
||
doc_buffer.read())
|
||
|
||
except ImportError:
|
||
# 如果没有安装python-docx库,回退到JSON格式
|
||
json_data = json.dumps(article_data, ensure_ascii=False, indent=2)
|
||
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.json'),
|
||
json_data)
|
||
|
||
# 添加媒体文件到ZIP包
|
||
if article.media_files:
|
||
for media_file in article.media_files:
|
||
try:
|
||
full_path = os.path.join(settings.MEDIA_ROOT, media_file)
|
||
if os.path.exists(full_path):
|
||
# 添加文件到ZIP包
|
||
zip_file.write(full_path, os.path.join(article_folder, 'media', media_file))
|
||
else:
|
||
# 如果是URL格式的媒体文件
|
||
if media_file.startswith('http'):
|
||
import requests
|
||
response = requests.get(media_file, timeout=10)
|
||
zip_file.writestr(
|
||
os.path.join(article_folder, 'media', os.path.basename(media_file)),
|
||
response.content)
|
||
except Exception as e:
|
||
# 如果添加媒体文件失败,继续处理其他文件
|
||
pass
|
||
|
||
# 创建HttpResponse
|
||
zip_buffer.seek(0)
|
||
response = HttpResponse(zip_buffer.getvalue(), content_type='application/zip')
|
||
response['Content-Disposition'] = 'attachment; filename=articles_export.zip'
|
||
return response
|
||
|
||
else:
|
||
return HttpResponse('不支持的格式', status=400)
|
||
|
||
except Exception as e:
|
||
return HttpResponse(f'导出失败: {str(e)}', status=500)
|
||
|
||
|
||
# 新增:按媒体类型导出文章视图
|
||
@csrf_exempt
|
||
@require_http_methods(["POST"])
|
||
def export_articles_by_type(request):
|
||
try:
|
||
# 解析请求数据
|
||
data = json.loads(request.body)
|
||
media_type = data.get('media_type', 'all')
|
||
format_type = data.get('format', 'zip')
|
||
|
||
# 根据媒体类型筛选文章
|
||
if media_type == 'text_only':
|
||
# 纯文本文章(没有媒体文件或媒体文件为空)
|
||
articles = Article.objects.filter(media_files__isnull=True) | Article.objects.filter(media_files=[])
|
||
elif media_type == 'with_images':
|
||
# 包含图片的文章
|
||
articles = Article.objects.filter(media_files__icontains='.jpg') | \
|
||
Article.objects.filter(media_files__icontains='.jpeg') | \
|
||
Article.objects.filter(media_files__icontains='.png') | \
|
||
Article.objects.filter(media_files__icontains='.gif')
|
||
elif media_type == 'with_videos':
|
||
# 包含视频的文章
|
||
articles = Article.objects.filter(media_files__icontains='.mp4') | \
|
||
Article.objects.filter(media_files__icontains='.avi') | \
|
||
Article.objects.filter(media_files__icontains='.mov') | \
|
||
Article.objects.filter(media_files__icontains='.wmv') | \
|
||
Article.objects.filter(media_files__icontains='.flv') | \
|
||
Article.objects.filter(media_files__icontains='.webm')
|
||
else:
|
||
# 所有文章
|
||
articles = Article.objects.all()
|
||
|
||
# 去重处理
|
||
articles = articles.distinct()
|
||
|
||
if not articles.exists():
|
||
return HttpResponse('没有符合条件的文章', status=400)
|
||
|
||
# 导出为ZIP格式
|
||
if format_type == 'zip':
|
||
import zipfile
|
||
from io import BytesIO
|
||
from django.conf import settings
|
||
import os
|
||
|
||
# 创建内存中的ZIP文件
|
||
zip_buffer = BytesIO()
|
||
|
||
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
|
||
# 为每篇文章创建Word文档并添加到ZIP文件中
|
||
for article in articles:
|
||
# 为每篇文章创建单独的文件夹
|
||
article_folder = f"article_{article.id}_{article.title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')}"
|
||
|
||
# 创建文章数据
|
||
article_data = {
|
||
'id': article.id,
|
||
'title': article.title,
|
||
'website': article.website.name,
|
||
'url': article.url,
|
||
'pub_date': article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else None,
|
||
'content': article.content,
|
||
'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
|
||
'media_files': article.media_files
|
||
}
|
||
|
||
# 将文章数据保存为Word文件并添加到ZIP
|
||
try:
|
||
from docx import Document
|
||
from docx.shared import Inches
|
||
from io import BytesIO
|
||
from bs4 import BeautifulSoup
|
||
import requests
|
||
|
||
# 创建Word文档
|
||
doc = Document()
|
||
doc.add_heading(article.title, 0)
|
||
|
||
# 添加文章元数据
|
||
doc.add_paragraph(f"网站: {article.website.name}")
|
||
doc.add_paragraph(f"URL: {article.url}")
|
||
doc.add_paragraph(
|
||
f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}")
|
||
doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
|
||
|
||
# 添加文章内容
|
||
doc.add_heading('内容', level=1)
|
||
|
||
# 处理HTML内容
|
||
soup = BeautifulSoup(article.content, 'html.parser')
|
||
|
||
# 处理内容中的图片
|
||
for img in soup.find_all('img'):
|
||
src = img.get('src', '')
|
||
if src:
|
||
try:
|
||
# 构建完整的图片路径
|
||
if src.startswith('http'):
|
||
# 网络图片
|
||
response = requests.get(src, timeout=10)
|
||
image_stream = BytesIO(response.content)
|
||
doc.add_picture(image_stream, width=Inches(4.0))
|
||
else:
|
||
# 本地图片
|
||
full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
|
||
if os.path.exists(full_path):
|
||
doc.add_picture(full_path, width=Inches(4.0))
|
||
except Exception as e:
|
||
# 如果添加图片失败,添加图片URL作为文本
|
||
doc.add_paragraph(f"[图片: {src}]")
|
||
|
||
# 移除原始img标签
|
||
img.decompose()
|
||
|
||
content_text = soup.get_text()
|
||
doc.add_paragraph(content_text)
|
||
|
||
# 添加媒体文件信息
|
||
if article.media_files:
|
||
doc.add_heading('媒体文件', level=1)
|
||
for media_file in article.media_files:
|
||
try:
|
||
full_path = os.path.join(settings.MEDIA_ROOT, media_file)
|
||
if os.path.exists(full_path):
|
||
# 检查文件扩展名以确定处理方式
|
||
file_extension = os.path.splitext(media_file)[1].lower()
|
||
|
||
# 图片文件处理
|
||
if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
|
||
doc.add_picture(full_path, width=Inches(4.0))
|
||
# 视频文件处理
|
||
elif file_extension in ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm']:
|
||
doc.add_paragraph(f"[视频文件: {media_file}]")
|
||
# 其他文件类型
|
||
else:
|
||
doc.add_paragraph(f"[文件: {media_file}]")
|
||
else:
|
||
# 如果是URL格式的媒体文件
|
||
if media_file.startswith('http'):
|
||
response = requests.get(media_file, timeout=10)
|
||
file_extension = os.path.splitext(media_file)[1].lower()
|
||
|
||
# 图片文件处理
|
||
if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
|
||
image_stream = BytesIO(response.content)
|
||
doc.add_picture(image_stream, width=Inches(4.0))
|
||
else:
|
||
doc.add_paragraph(f"[文件: {media_file}]")
|
||
else:
|
||
doc.add_paragraph(media_file)
|
||
except Exception as e:
|
||
doc.add_paragraph(media_file)
|
||
|
||
# 保存Word文档到内存
|
||
doc_buffer = BytesIO()
|
||
doc.save(doc_buffer)
|
||
doc_buffer.seek(0)
|
||
|
||
# 将Word文档添加到ZIP包
|
||
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'),
|
||
doc_buffer.read())
|
||
|
||
except ImportError:
|
||
# 如果没有安装python-docx库,回退到JSON格式
|
||
json_data = json.dumps(article_data, ensure_ascii=False, indent=2)
|
||
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.json'),
|
||
json_data)
|
||
|
||
# 添加媒体文件到ZIP包
|
||
if article.media_files:
|
||
for media_file in article.media_files:
|
||
try:
|
||
full_path = os.path.join(settings.MEDIA_ROOT, media_file)
|
||
if os.path.exists(full_path):
|
||
# 添加文件到ZIP包
|
||
zip_file.write(full_path, os.path.join(article_folder, 'media', media_file))
|
||
else:
|
||
# 如果是URL格式的媒体文件
|
||
if media_file.startswith('http'):
|
||
import requests
|
||
response = requests.get(media_file, timeout=10)
|
||
zip_file.writestr(
|
||
os.path.join(article_folder, 'media', os.path.basename(media_file)),
|
||
response.content)
|
||
except Exception as e:
|
||
# 如果添加媒体文件失败,继续处理其他文件
|
||
pass
|
||
|
||
# 创建HttpResponse
|
||
zip_buffer.seek(0)
|
||
response = HttpResponse(zip_buffer.getvalue(), content_type='application/zip')
|
||
response['Content-Disposition'] = f'attachment; filename=articles_{media_type}.zip'
|
||
return response
|
||
|
||
else:
|
||
return HttpResponse('不支持的格式', status=400)
|
||
|
||
except Exception as e:
|
||
return HttpResponse(f'导出失败: {str(e)}', status=500)
|