Unknow change

This commit is contained in:
2025-08-13 18:40:31 +08:00
parent 5e396796ca
commit c618528a0a
6 changed files with 996 additions and 528 deletions

View File

@@ -34,7 +34,7 @@ def run_crawler_view(request):
if not website_name: if not website_name:
messages.error(request, '请选择要爬取的网站') messages.error(request, '请选择要爬取的网站')
return redirect('admin:core_article_changelist') return redirect('admin:core_article_changelist')
try: try:
# 根据网站名称确定要执行的爬虫命令 # 根据网站名称确定要执行的爬虫命令
if website_name == 'crawl_xinhua': if website_name == 'crawl_xinhua':
@@ -46,14 +46,14 @@ def run_crawler_view(request):
else: else:
# 对于其他网站,使用通用爬虫命令 # 对于其他网站,使用通用爬虫命令
crawler_name = 'crawl_articles' crawler_name = 'crawl_articles'
# 运行爬虫命令不传递website_name作为参数 # 运行爬虫命令不传递website_name作为参数
call_command(crawler_name) call_command(crawler_name)
messages.success(request, f'成功执行爬虫: {crawler_name}') messages.success(request, f'成功执行爬虫: {crawler_name}')
except Exception as e: except Exception as e:
messages.error(request, f'执行爬虫失败: {str(e)}') messages.error(request, f'执行爬虫失败: {str(e)}')
return redirect('admin:core_article_changelist') return redirect('admin:core_article_changelist')
@@ -241,12 +241,12 @@ class ArticleAdmin(admin.ModelAdmin):
# 创建内存中的ZIP文件 # 创建内存中的ZIP文件
zip_buffer = BytesIO() zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zip_file: with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
for article in queryset: for article in queryset:
# 为每篇文章创建单独的文件夹 # 为每篇文章创建单独的文件夹
article_folder = f"article_{article.id}_{article.title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')}" article_folder = f"article_{article.id}_{article.title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')}"
# 创建Word文档 # 创建Word文档
doc = Document() doc = Document()
doc.add_heading(article.title, 0) doc.add_heading(article.title, 0)
@@ -281,7 +281,8 @@ class ArticleAdmin(admin.ModelAdmin):
image_stream = BytesIO(response.content) image_stream = BytesIO(response.content)
doc.add_picture(image_stream, width=Inches(4.0)) doc.add_picture(image_stream, width=Inches(4.0))
# 将网络文件保存到ZIP # 将网络文件保存到ZIP
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(src)), response.content) zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(src)),
response.content)
else: else:
# 本地图片 # 本地图片
full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/')) full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
@@ -310,7 +311,7 @@ class ArticleAdmin(admin.ModelAdmin):
full_path = os.path.join(settings.MEDIA_ROOT, media_file) full_path = os.path.join(settings.MEDIA_ROOT, media_file)
# 检查文件扩展名以确定处理方式 # 检查文件扩展名以确定处理方式
file_extension = os.path.splitext(media_file)[1].lower() file_extension = os.path.splitext(media_file)[1].lower()
# 图片文件处理 # 图片文件处理
if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']: if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
if os.path.exists(full_path): if os.path.exists(full_path):
@@ -325,7 +326,9 @@ class ArticleAdmin(admin.ModelAdmin):
image_stream = BytesIO(response.content) image_stream = BytesIO(response.content)
doc.add_picture(image_stream, width=Inches(4.0)) doc.add_picture(image_stream, width=Inches(4.0))
# 将网络文件保存到ZIP # 将网络文件保存到ZIP
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(media_file)), response.content) zip_file.writestr(
os.path.join(article_folder, 'media', os.path.basename(media_file)),
response.content)
else: else:
doc.add_paragraph(media_file) doc.add_paragraph(media_file)
# 视频文件处理 # 视频文件处理
@@ -341,7 +344,9 @@ class ArticleAdmin(admin.ModelAdmin):
if media_file.startswith('http'): if media_file.startswith('http'):
# 将网络文件保存到ZIP # 将网络文件保存到ZIP
response = requests.get(media_file, timeout=10) response = requests.get(media_file, timeout=10)
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(media_file)), response.content) zip_file.writestr(
os.path.join(article_folder, 'media', os.path.basename(media_file)),
response.content)
doc.add_paragraph(f"[视频文件: {media_file}]") doc.add_paragraph(f"[视频文件: {media_file}]")
else: else:
doc.add_paragraph(media_file) doc.add_paragraph(media_file)
@@ -355,7 +360,9 @@ class ArticleAdmin(admin.ModelAdmin):
# 如果是URL格式的媒体文件 # 如果是URL格式的媒体文件
if media_file.startswith('http'): if media_file.startswith('http'):
response = requests.get(media_file, timeout=10) response = requests.get(media_file, timeout=10)
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(media_file)), response.content) zip_file.writestr(
os.path.join(article_folder, 'media', os.path.basename(media_file)),
response.content)
doc.add_paragraph(f"[文件: {media_file}]") doc.add_paragraph(f"[文件: {media_file}]")
else: else:
doc.add_paragraph(media_file) doc.add_paragraph(media_file)
@@ -366,7 +373,8 @@ class ArticleAdmin(admin.ModelAdmin):
doc_buffer = BytesIO() doc_buffer = BytesIO()
doc.save(doc_buffer) doc.save(doc_buffer)
doc_buffer.seek(0) doc_buffer.seek(0)
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'), doc_buffer.read()) zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'),
doc_buffer.read())
# 创建HttpResponse # 创建HttpResponse
zip_buffer.seek(0) zip_buffer.seek(0)
@@ -512,5 +520,4 @@ class DongfangyancaoArticleAdmin(admin.ModelAdmin):
export_as_json.short_description = "导出选中文章为JSON格式" export_as_json.short_description = "导出选中文章为JSON格式"
# 在各自的管理站点中注册模型 # 在各自的管理站点中注册模型

View File

@@ -13,7 +13,7 @@
<option value="crawl_dongfangyancao">东方烟草报</option> <option value="crawl_dongfangyancao">东方烟草报</option>
<option value="crawl_articles">通用爬虫</option> <option value="crawl_articles">通用爬虫</option>
</select> </select>
<input type="submit" value="执行爬虫" class="default" style="margin-left: 10px;" /> <input type="submit" value="执行爬虫" class="default" style="margin-left: 10px;"/>
</form> </form>
</div> </div>
{% endblock %} {% endblock %}

View File

@@ -2,24 +2,23 @@
<html lang="zh"> <html lang="zh">
<head> <head>
<meta charset="UTF-8"/> <meta charset="UTF-8"/>
<title>{{ article.title }}</title> <title>绿色课堂文章列表</title>
<style> <style>
body { body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
line-height: 1.6; line-height: 1.6;
color: #333; color: #333;
max-width: 1200px; /* 修改:同步调整页面最大宽度与列表页一致 */
margin: 0 auto; margin: 0 auto;
padding: 20px; padding: 20px;
background-color: #f8f9fa; background-color: #f0f8ff; /* 统一背景色调 */
} }
.article-container { .container {
background: white; background: white;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
padding: 30px; padding: 30px;
margin-bottom: 20px; margin-bottom: 20px;
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); /* 添加轻微阴影 */
border-radius: 8px; /* 添加圆角 */
} }
h1 { h1 {
@@ -29,57 +28,765 @@
margin-top: 0; margin-top: 0;
} }
.meta { .filters {
color: #7f8c8d;
font-size: 0.9em;
margin-bottom: 20px; margin-bottom: 20px;
padding: 15px;
background-color: #e3f2fd; /* 统一滤镜背景色调 */
border-radius: 5px;
} }
hr { .filters a {
border: 0;
height: 1px;
background: #ecf0f1;
margin: 20px 0;
}
.content {
font-size: 16px;
}
.content img {
max-width: 100%;
height: auto;
border-radius: 4px;
margin: 10px 0;
}
.back-link {
display: inline-block; display: inline-block;
padding: 10px 20px; padding: 5px 10px;
margin: 0 5px 5px 0;
background-color: #bbdefb; /* 统一链接背景色调 */
color: #0d47a1;
text-decoration: none;
border-radius: 3px;
}
.filters a.active {
background-color: #3498db;
color: white;
}
ul {
list-style: none;
padding: 0;
}
li {
padding: 10px 0;
border-bottom: 1px solid #e0e0e0; /* 统一分隔线颜色 */
}
li:last-child {
border-bottom: none;
}
a {
color: #1976d2; /* 统一链接颜色 */
text-decoration: none;
}
a:hover {
color: #0d47a1; /* 统一悬停颜色 */
text-decoration: underline;
}
.meta {
color: #78909c; /* 统一元数据颜色 */
font-size: 0.9em;
}
.pagination {
margin-top: 30px;
text-align: center;
padding: 20px 0;
}
.pagination a {
display: inline-block;
padding: 8px 16px;
background-color: #3498db; background-color: #3498db;
color: white; color: white;
text-decoration: none; text-decoration: none;
border-radius: 4px; border-radius: 4px;
transition: background-color 0.3s; margin: 0 2px; /* 修改:调整页码间距 */
} }
.back-link:hover { .pagination a:hover {
background-color: #2980b9; background-color: #2980b9;
} }
.pagination span {
margin: 0 10px;
color: #7f8c8d;
}
/* 新增:当前页码样式 */
.pagination .current {
background-color: #2980b9;
cursor: default;
}
/* 新增:省略号样式 */
.pagination .ellipsis {
display: inline-block;
padding: 8px 4px;
color: #7f8c8d;
}
/* 新增:搜索框样式 */
.search-form {
margin-bottom: 20px;
padding: 15px;
background-color: #e3f2fd; /* 统一搜索框背景色调 */
border-radius: 5px;
}
.search-form input[type="text"] {
padding: 8px 12px;
border: 1px solid #bbdefb; /* 统一边框颜色 */
border-radius: 4px;
width: 300px;
margin-right: 10px;
background-color: #fff;
}
.search-form input[type="submit"] {
padding: 8px 16px;
background-color: #3498db;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
}
.search-form input[type="submit"]:hover {
background-color: #2980b9;
}
.search-info {
color: #78909c; /* 统一搜索信息颜色 */
font-size: 0.9em;
margin-bottom: 10px;
}
/* 新增:左侧筛选栏样式 */
.content-wrapper {
display: flex;
gap: 20px;
}
.sidebar {
flex: 0 0 200px;
background-color: #e3f2fd; /* 统一边栏背景色调 */
border-radius: 5px;
padding: 15px;
}
.main-content {
flex: 1;
}
.sidebar .filters {
margin-bottom: 20px;
padding: 0;
background-color: transparent;
}
.sidebar .filters strong {
display: block;
margin-bottom: 10px;
color: #2c3e50;
}
.sidebar .filters a {
display: block;
padding: 8px 10px;
margin: 0 0 5px 0;
background-color: #bbdefb; /* 统一边栏链接背景色调 */
color: #0d47a1;
text-decoration: none;
border-radius: 3px;
}
.sidebar .filters a.active {
background-color: #3498db;
color: white;
}
/* 新增:导出功能样式 */
.export-section {
margin-bottom: 20px;
padding: 15px;
background-color: #e8f5e9; /* 统一导出区域背景色调 */
border-radius: 5px;
text-align: center;
}
.export-btn {
padding: 10px 20px;
background-color: #4caf50; /* 统一按钮背景色调 */
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
margin: 0 5px;
}
.export-btn:hover {
background-color: #388e3c; /* 统一按钮悬停色调 */
}
.export-btn:disabled {
background-color: #9e9e9e; /* 统一禁用按钮色调 */
cursor: not-allowed;
}
.article-checkbox {
margin-right: 10px;
}
/* 新增:爬虫控制按钮样式 */
.crawler-control {
margin-bottom: 20px;
padding: 15px;
background-color: #fff3e0; /* 统一爬虫控制区域背景色调 */
border-radius: 5px;
text-align: center;
}
.crawler-btn {
padding: 10px 20px;
background-color: #ff9800; /* 统一爬虫按钮背景色调 */
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
margin: 0 5px;
}
.crawler-btn:hover {
background-color: #f57c00; /* 统一爬虫按钮悬停色调 */
}
.crawler-btn:disabled {
background-color: #9e9e9e; /* 统一禁用爬虫按钮色调 */
cursor: not-allowed;
}
.crawler-result {
margin-top: 10px;
padding: 10px;
border-radius: 4px;
display: none;
}
.crawler-result.success {
background-color: #e8f5e9;
color: #2e7d32;
}
.crawler-result.error {
background-color: #ffebee;
color: #c62828;
}
/* 新增:进度条样式 */
.progress-container {
margin-top: 10px;
display: none;
}
.progress-bar {
width: 100%;
height: 20px;
background-color: #e0e0e0;
border-radius: 10px;
overflow: hidden;
}
.progress-fill {
height: 100%;
background-color: #4caf50;
width: 0%;
transition: width 0.3s ease;
}
.progress-text {
margin-top: 5px;
font-size: 14px;
color: #666;
}
</style> </style>
</head> </head>
<body> <body>
<div class="article-container"> <div class="container">
<h1>{{ article.title }}</h1> <h1>绿色课堂文章列表</h1>
<div class="meta">
<p>发布时间: {{ article.pub_date|date:"Y-m-d H:i" }}</p> <!-- 新增:搜索表单 -->
<div class="search-form">
<form method="get">
<input type="text" name="q" placeholder="输入关键词搜索文章..." value="{{ search_query }}">
{% if selected_website %}
<input type="hidden" name="website" value="{{ selected_website.id }}">
{% endif %}
<input type="submit" value="搜索">
</form>
</div> </div>
<hr/>
<div class="content"> <div class="content-wrapper">
{{ article.content|safe }} <!-- 左侧筛选栏 -->
<div class="sidebar">
<div class="filters">
<strong>按网站筛选:</strong>
<a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}"
{% if not selected_website %}class="active" {% endif %}>全部</a>
{% for website in websites %}
<a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}"
{% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
{% endfor %}
</div>
</div>
<!-- 主内容区域 -->
<div class="main-content">
<!-- 新增:搜索结果信息 -->
{% if search_query %}
<div class="search-info">
搜索 "{{ search_query }}" 找到 {{ page_obj.paginator.count }} 篇文章
<a href="{% if selected_website %}?website={{ selected_website.id }}{% else %}{% url 'article_list' %}{% endif %}">清除搜索</a>
</div>
{% endif %}
<!-- 新增:导出功能 -->
<div class="export-section">
<button id="selectAllBtn" class="export-btn">全选</button>
<button id="deselectAllBtn" class="export-btn">取消全选</button>
<button id="exportJsonBtn" class="export-btn" disabled>导出为JSON</button>
<button id="exportCsvBtn" class="export-btn" disabled>导出为CSV</button>
<!-- 新增:导出为ZIP包按钮 -->
<button id="exportZipBtn" class="export-btn" disabled>导出为ZIP包</button>
</div>
<ul>
{% for article in page_obj %}
<li>
<input type="checkbox" class="article-checkbox" value="{{ article.id }}"
id="article_{{ article.id }}">
<a href="{% url 'article_detail' article.id %}">{{ article.title }}</a>
<div class="meta">({{ article.website.name }} - {{ article.created_at|date:"Y-m-d" }})</div>
</li>
{% empty %}
<li>暂无文章</li>
{% endfor %}
</ul>
<div class="pagination">
{% if page_obj.has_previous %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo;
首页</a>
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page=1">&laquo; 首页</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.previous_page_number }}">上一页</a>
{% endif %}
{% endif %}
<span>第 {{ page_obj.number }} 页,共 {{ page_obj.paginator.num_pages }} 页</span>
<!-- 修改:优化页码显示逻辑 -->
{% with page_obj.paginator as paginator %}
{% for num in paginator.page_range %}
{% if page_obj.number == num %}
<a href="#" class="current">{{ num }}</a>
{% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
{% endif %}
{% elif num == 1 or num == paginator.num_pages %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
{% endif %}
{% elif num == page_obj.number|add:'-3' or num == page_obj.number|add:'3' %}
<span class="ellipsis">...</span>
{% endif %}
{% endfor %}
{% endwith %}
{% if page_obj.has_next %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页
&raquo;</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.next_page_number }}">下一页</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页
&raquo;</a>
{% endif %}
{% endif %}
</div>
</div>
</div> </div>
<hr/>
<p><a href="{% url 'article_list' %}" class="back-link">← 返回列表</a></p>
</div> </div>
<script>
// 存储当前任务ID和检查状态的定时器
let currentTaskId = null;
let statusCheckInterval = null;
// 获取页面元素
const runCrawlerBtn = document.getElementById('runCrawlerBtn');
const runDongfangyancaoCrawlerBtn = document.getElementById('runDongfangyancaoCrawlerBtn');
const pauseCrawlerBtn = document.getElementById('pauseCrawlerBtn');
const progressContainer = document.getElementById('crawlerProgress');
const progressFill = document.getElementById('progressFill');
const progressText = document.getElementById('progressText');
const resultDiv = document.getElementById('crawlerResult');
// 绑定爬虫按钮事件
runCrawlerBtn.addEventListener('click', function () {
runCrawler('www.news.cn', 'crawl_xinhua');
});
runDongfangyancaoCrawlerBtn.addEventListener('click', function () {
runCrawler('东方烟草报', 'crawl_dongfangyancao');
});
// 暂停按钮事件
pauseCrawlerBtn.addEventListener('click', function () {
if (currentTaskId) {
pauseCrawler(currentTaskId);
}
});
// 运行爬虫函数
function runCrawler(websiteName, crawlerName) {
// 禁用按钮,防止重复点击
runCrawlerBtn.disabled = true;
runDongfangyancaoCrawlerBtn.disabled = true;
resultDiv.style.display = 'none';
// 显示进度区域
progressContainer.style.display = 'block';
updateProgress(0, '爬虫启动中...');
// 发送POST请求运行爬虫
fetch('{% url "run_crawler" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'crawler_name=' + crawlerName
})
.then(response => response.json())
.then(data => {
if (data.status === 'success') {
currentTaskId = data.task_id;
pauseCrawlerBtn.disabled = false;
// 启动轮询检查爬虫状态
checkCrawlerStatus(currentTaskId);
} else {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
progressContainer.style.display = 'none';
}
})
.catch(error => {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '请求失败: ' + error;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
progressContainer.style.display = 'none';
});
}
// 检查爬虫状态的函数
function checkCrawlerStatus(taskId) {
// 清除之前的定时器
if (statusCheckInterval) {
clearInterval(statusCheckInterval);
}
// 设置新的定时器,每秒检查一次状态
statusCheckInterval = setInterval(() => {
fetch('{% url "crawler_status" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'task_id=' + taskId
})
.then(response => response.json())
.then(data => {
if (data.status === 'running') {
// 更新进度信息(模拟进度)
const elapsedTime = new Date() - new Date(data.start_time);
const progress = Math.min(90, Math.floor(elapsedTime / 1000));
updateProgress(progress, data.message);
} else if (data.status === 'completed') {
// 爬虫完成,显示结果
clearInterval(statusCheckInterval);
updateProgress(100, data.message);
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
// 显示结果信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result success';
resultDiv.textContent = data.message;
// 3秒后自动隐藏进度条
setTimeout(() => {
progressContainer.style.display = 'none';
}, 3000);
// 自动刷新页面以显示新文章
setTimeout(() => {
location.reload();
}, 2000);
} else if (data.status === 'paused') {
// 爬虫暂停
clearInterval(statusCheckInterval);
updateProgress(data.progress || 0, '爬虫已暂停');
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
// 显示结果信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result success';
resultDiv.textContent = '爬虫已暂停';
} else if (data.status === 'error') {
// 爬虫出错
clearInterval(statusCheckInterval);
progressContainer.style.display = 'none';
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
}
})
.catch(error => {
clearInterval(statusCheckInterval);
progressContainer.style.display = 'none';
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '检查状态失败: ' + error;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
});
}, 1000);
}
// 更新进度条函数
function updateProgress(percent, text) {
progressFill.style.width = percent + '%';
progressText.textContent = text;
}
// 暂停爬虫函数
function pauseCrawler(taskId) {
fetch('{% url "pause_crawler" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'task_id=' + taskId
})
.then(response => response.json())
.then(data => {
if (data.status === 'success') {
// 暂停成功更新UI
pauseCrawlerBtn.disabled = true;
updateProgress(data.progress || 0, '正在暂停...');
} else {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
}
})
.catch(error => {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '暂停请求失败: ' + error;
});
}
// 导出功能相关JavaScript
const checkboxes = document.querySelectorAll('.article-checkbox');
const exportJsonBtn = document.getElementById('exportJsonBtn');
const exportCsvBtn = document.getElementById('exportCsvBtn');
const selectAllBtn = document.getElementById('selectAllBtn');
const deselectAllBtn = document.getElementById('deselectAllBtn');
// 新增:获取ZIP导出按钮元素
const exportZipBtn = document.getElementById('exportZipBtn');
// 更新导出按钮状态
function updateExportButtons() {
const selectedCount = document.querySelectorAll('.article-checkbox:checked').length;
exportJsonBtn.disabled = selectedCount === 0;
exportCsvBtn.disabled = selectedCount === 0;
exportZipBtn.disabled = selectedCount === 0; // 新增:更新ZIP导出按钮状态
}
// 为所有复选框添加事件监听器
checkboxes.forEach(checkbox => {
checkbox.addEventListener('change', updateExportButtons);
});
// 全选功能
selectAllBtn.addEventListener('click', () => {
checkboxes.forEach(checkbox => {
checkbox.checked = true;
});
updateExportButtons();
});
// 取消全选功能
deselectAllBtn.addEventListener('click', () => {
checkboxes.forEach(checkbox => {
checkbox.checked = false;
});
updateExportButtons();
});
// 导出为JSON功能
exportJsonBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value);
// 发送POST请求导出文章
fetch('{% url "export_articles" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRFToken': '{{ csrf_token }}'
},
body: JSON.stringify({
article_ids: selectedArticles,
format: 'json'
})
})
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.json';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
});
// 导出为CSV功能
exportCsvBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value);
// 发送POST请求导出文章
fetch('{% url "export_articles" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRFToken': '{{ csrf_token }}'
},
body: JSON.stringify({
article_ids: selectedArticles,
format: 'csv'
})
})
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.csv';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
});
// 新增:导出为ZIP包功能
exportZipBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value);
// 发送POST请求导出文章为ZIP包
fetch('{% url "export_articles" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRFToken': '{{ csrf_token }}'
},
body: JSON.stringify({
article_ids: selectedArticles,
format: 'zip' // 指定导出格式为ZIP
})
})
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.zip';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
});
// 初始化导出按钮状态
updateExportButtons();
</script>
</body> </body>
</html> </html>

View File

@@ -17,7 +17,7 @@
background: white; background: white;
padding: 30px; padding: 30px;
margin-bottom: 20px; margin-bottom: 20px;
box-shadow: 0 2px 5px rgba(0,0,0,0.05); /* 添加轻微阴影 */ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); /* 添加轻微阴影 */
border-radius: 8px; /* 添加圆角 */ border-radius: 8px; /* 添加圆角 */
} }
@@ -116,7 +116,7 @@
padding: 8px 4px; padding: 8px 4px;
color: #7f8c8d; color: #7f8c8d;
} }
/* 新增:搜索框样式 */ /* 新增:搜索框样式 */
.search-form { .search-form {
margin-bottom: 20px; margin-bottom: 20px;
@@ -124,7 +124,7 @@
background-color: #e3f2fd; /* 统一搜索框背景色调 */ background-color: #e3f2fd; /* 统一搜索框背景色调 */
border-radius: 5px; border-radius: 5px;
} }
.search-form input[type="text"] { .search-form input[type="text"] {
padding: 8px 12px; padding: 8px 12px;
border: 1px solid #bbdefb; /* 统一边框颜色 */ border: 1px solid #bbdefb; /* 统一边框颜色 */
@@ -133,7 +133,7 @@
margin-right: 10px; margin-right: 10px;
background-color: #fff; background-color: #fff;
} }
.search-form input[type="submit"] { .search-form input[type="submit"] {
padding: 8px 16px; padding: 8px 16px;
background-color: #3498db; background-color: #3498db;
@@ -142,46 +142,46 @@
border-radius: 4px; border-radius: 4px;
cursor: pointer; cursor: pointer;
} }
.search-form input[type="submit"]:hover { .search-form input[type="submit"]:hover {
background-color: #2980b9; background-color: #2980b9;
} }
.search-info { .search-info {
color: #78909c; /* 统一搜索信息颜色 */ color: #78909c; /* 统一搜索信息颜色 */
font-size: 0.9em; font-size: 0.9em;
margin-bottom: 10px; margin-bottom: 10px;
} }
/* 新增:左侧筛选栏样式 */ /* 新增:左侧筛选栏样式 */
.content-wrapper { .content-wrapper {
display: flex; display: flex;
gap: 20px; gap: 20px;
} }
.sidebar { .sidebar {
flex: 0 0 200px; flex: 0 0 200px;
background-color: #e3f2fd; /* 统一边栏背景色调 */ background-color: #e3f2fd; /* 统一边栏背景色调 */
border-radius: 5px; border-radius: 5px;
padding: 15px; padding: 15px;
} }
.main-content { .main-content {
flex: 1; flex: 1;
} }
.sidebar .filters { .sidebar .filters {
margin-bottom: 20px; margin-bottom: 20px;
padding: 0; padding: 0;
background-color: transparent; background-color: transparent;
} }
.sidebar .filters strong { .sidebar .filters strong {
display: block; display: block;
margin-bottom: 10px; margin-bottom: 10px;
color: #2c3e50; color: #2c3e50;
} }
.sidebar .filters a { .sidebar .filters a {
display: block; display: block;
padding: 8px 10px; padding: 8px 10px;
@@ -191,12 +191,12 @@
text-decoration: none; text-decoration: none;
border-radius: 3px; border-radius: 3px;
} }
.sidebar .filters a.active { .sidebar .filters a.active {
background-color: #3498db; background-color: #3498db;
color: white; color: white;
} }
/* 新增:导出功能样式 */ /* 新增:导出功能样式 */
.export-section { .export-section {
margin-bottom: 20px; margin-bottom: 20px;
@@ -205,7 +205,7 @@
border-radius: 5px; border-radius: 5px;
text-align: center; text-align: center;
} }
.export-btn { .export-btn {
padding: 10px 20px; padding: 10px 20px;
background-color: #4caf50; /* 统一按钮背景色调 */ background-color: #4caf50; /* 统一按钮背景色调 */
@@ -216,118 +216,31 @@
font-size: 16px; font-size: 16px;
margin: 0 5px; margin: 0 5px;
} }
.export-btn:hover { .export-btn:hover {
background-color: #388e3c; /* 统一按钮悬停色调 */ background-color: #388e3c; /* 统一按钮悬停色调 */
} }
.export-btn:disabled { .export-btn:disabled {
background-color: #9e9e9e; /* 统一禁用按钮色调 */ background-color: #9e9e9e; /* 统一禁用按钮色调 */
cursor: not-allowed; cursor: not-allowed;
} }
.article-checkbox { .article-checkbox {
margin-right: 10px; margin-right: 10px;
} }
/* 新增:爬虫控制按钮样式 */
.crawler-control {
margin-bottom: 20px;
padding: 15px;
background-color: #fff3e0; /* 统一爬虫控制区域背景色调 */
border-radius: 5px;
text-align: center;
}
.crawler-btn {
padding: 10px 20px;
background-color: #ff9800; /* 统一爬虫按钮背景色调 */
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
margin: 0 5px;
}
.crawler-btn:hover {
background-color: #f57c00; /* 统一爬虫按钮悬停色调 */
}
.crawler-btn:disabled {
background-color: #9e9e9e; /* 统一禁用爬虫按钮色调 */
cursor: not-allowed;
}
.crawler-result {
margin-top: 10px;
padding: 10px;
border-radius: 4px;
display: none;
}
.crawler-result.success {
background-color: #e8f5e9;
color: #2e7d32;
}
.crawler-result.error {
background-color: #ffebee;
color: #c62828;
}
/* 新增:进度条样式 */
.progress-container {
margin-top: 10px;
display: none;
}
.progress-bar {
width: 100%;
height: 20px;
background-color: #e0e0e0;
border-radius: 10px;
overflow: hidden;
}
.progress-fill {
height: 100%;
background-color: #4caf50;
width: 0%;
transition: width 0.3s ease;
}
.progress-text {
margin-top: 5px;
font-size: 14px;
color: #666;
}
</style> </style>
</head> </head>
<body> <body>
<div class="container"> <div class="container">
<h1>绿色课堂文章列表</h1> <h1>绿色课堂文章列表</h1>
<!-- 新增:爬虫控制按钮 -->
<div class="crawler-control">
<button id="runCrawlerBtn" class="crawler-btn" data-website="www.news.cn">执行新华网爬虫</button>
<button id="runDongfangyancaoCrawlerBtn" class="crawler-btn" data-website="东方烟草报">执行东方烟草报爬虫</button>
<button id="pauseCrawlerBtn" class="crawler-btn" disabled>暂停爬虫</button>
<div id="crawlerProgress" class="progress-container">
<div class="progress-bar">
<div class="progress-fill" id="progressFill"></div>
</div>
<div class="progress-text" id="progressText">准备中...</div>
</div>
<div id="crawlerResult" class="crawler-result"></div>
</div>
<!-- 新增:搜索表单 --> <!-- 新增:搜索表单 -->
<div class="search-form"> <div class="search-form">
<form method="get"> <form method="get">
<input type="text" name="q" placeholder="输入关键词搜索文章..." value="{{ search_query }}"> <input type="text" name="q" placeholder="输入关键词搜索文章..." value="{{ search_query }}">
{% if selected_website %} {% if selected_website %}
<input type="hidden" name="website" value="{{ selected_website.id }}"> <input type="hidden" name="website" value="{{ selected_website.id }}">
{% endif %} {% endif %}
<input type="submit" value="搜索"> <input type="submit" value="搜索">
</form> </form>
@@ -338,23 +251,25 @@
<div class="sidebar"> <div class="sidebar">
<div class="filters"> <div class="filters">
<strong>按网站筛选:</strong> <strong>按网站筛选:</strong>
<a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}" {% if not selected_website %}class="active" {% endif %}>全部</a> <a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}"
{% if not selected_website %}class="active" {% endif %}>全部</a>
{% for website in websites %} {% for website in websites %}
<a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}" {% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a> <a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}"
{% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
{% endfor %} {% endfor %}
</div> </div>
</div> </div>
<!-- 主内容区域 --> <!-- 主内容区域 -->
<div class="main-content"> <div class="main-content">
<!-- 新增:搜索结果信息 --> <!-- 新增:搜索结果信息 -->
{% if search_query %} {% if search_query %}
<div class="search-info"> <div class="search-info">
搜索 "{{ search_query }}" 找到 {{ page_obj.paginator.count }} 篇文章 搜索 "{{ search_query }}" 找到 {{ page_obj.paginator.count }} 篇文章
<a href="{% if selected_website %}?website={{ selected_website.id }}{% else %}{% url 'article_list' %}{% endif %}">清除搜索</a> <a href="{% if selected_website %}?website={{ selected_website.id }}{% else %}{% url 'article_list' %}{% endif %}">清除搜索</a>
</div> </div>
{% endif %} {% endif %}
<!-- 新增:导出功能 --> <!-- 新增:导出功能 -->
<div class="export-section"> <div class="export-section">
<button id="selectAllBtn" class="export-btn">全选</button> <button id="selectAllBtn" class="export-btn">全选</button>
@@ -367,60 +282,70 @@
<ul> <ul>
{% for article in page_obj %} {% for article in page_obj %}
<li> <li>
<input type="checkbox" class="article-checkbox" value="{{ article.id }}" id="article_{{ article.id }}"> <input type="checkbox" class="article-checkbox" value="{{ article.id }}"
<a href="{% url 'article_detail' article.id %}">{{ article.title }}</a> id="article_{{ article.id }}">
<div class="meta">({{ article.website.name }} - {{ article.created_at|date:"Y-m-d" }})</div> <a href="{% url 'article_detail' article.id %}">{{ article.title }}</a>
</li> <div class="meta">({{ article.website.name }} - {{ article.created_at|date:"Y-m-d" }})</div>
{% empty %} </li>
<li>暂无文章</li> {% empty %}
<li>暂无文章</li>
{% endfor %} {% endfor %}
</ul> </ul>
<div class="pagination"> <div class="pagination">
{% if page_obj.has_previous %} {% if page_obj.has_previous %}
{% if selected_website %} {% if selected_website %}
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo; 首页</a> <a href="?website=
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a> {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo;
{% else %} 首页</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page=1">&laquo; 首页</a> <a href="?website=
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.previous_page_number }}">上一页</a> {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
{% endif %} {% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page=1">&laquo; 首页</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.previous_page_number }}">上一页</a>
{% endif %}
{% endif %} {% endif %}
<span>第 {{ page_obj.number }} 页,共 {{ page_obj.paginator.num_pages }} 页</span> <span>第 {{ page_obj.number }} 页,共 {{ page_obj.paginator.num_pages }} 页</span>
<!-- 修改:优化页码显示逻辑 --> <!-- 修改:优化页码显示逻辑 -->
{% with page_obj.paginator as paginator %} {% with page_obj.paginator as paginator %}
{% for num in paginator.page_range %} {% for num in paginator.page_range %}
{% if page_obj.number == num %} {% if page_obj.number == num %}
<a href="#" class="current">{{ num }}</a> <a href="#" class="current">{{ num }}</a>
{% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %} {% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
{% if selected_website %} {% if selected_website %}
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a> <a href="?website=
{% else %} {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a> {% else %}
{% endif %} <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
{% elif num == 1 or num == paginator.num_pages %} {% endif %}
{% if selected_website %} {% elif num == 1 or num == paginator.num_pages %}
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a> {% if selected_website %}
{% else %} <a href="?website=
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a> {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
{% endif %} {% else %}
{% elif num == page_obj.number|add:'-3' or num == page_obj.number|add:'3' %} <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
<span class="ellipsis">...</span> {% endif %}
{% endif %} {% elif num == page_obj.number|add:'-3' or num == page_obj.number|add:'3' %}
{% endfor %} <span class="ellipsis">...</span>
{% endif %}
{% endfor %}
{% endwith %} {% endwith %}
{% if page_obj.has_next %} {% if page_obj.has_next %}
{% if selected_website %} {% if selected_website %}
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a> <a href="?website=
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a> {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
{% else %} <a href="?website=
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.next_page_number }}">下一页</a> {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a> &raquo;</a>
{% endif %} {% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.next_page_number }}">下一页</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页
&raquo;</a>
{% endif %}
{% endif %} {% endif %}
</div> </div>
</div> </div>
@@ -428,213 +353,6 @@
</div> </div>
<script> <script>
// 存储当前任务ID和检查状态的定时器
let currentTaskId = null;
let statusCheckInterval = null;
// 获取页面元素
const runCrawlerBtn = document.getElementById('runCrawlerBtn');
const runDongfangyancaoCrawlerBtn = document.getElementById('runDongfangyancaoCrawlerBtn');
const pauseCrawlerBtn = document.getElementById('pauseCrawlerBtn');
const progressContainer = document.getElementById('crawlerProgress');
const progressFill = document.getElementById('progressFill');
const progressText = document.getElementById('progressText');
const resultDiv = document.getElementById('crawlerResult');
// 绑定爬虫按钮事件
runCrawlerBtn.addEventListener('click', function() {
runCrawler('www.news.cn', 'crawl_xinhua');
});
runDongfangyancaoCrawlerBtn.addEventListener('click', function() {
runCrawler('东方烟草报', 'crawl_dongfangyancao');
});
// 暂停按钮事件
pauseCrawlerBtn.addEventListener('click', function() {
if (currentTaskId) {
pauseCrawler(currentTaskId);
}
});
// 运行爬虫函数
function runCrawler(websiteName, crawlerName) {
// 禁用按钮,防止重复点击
runCrawlerBtn.disabled = true;
runDongfangyancaoCrawlerBtn.disabled = true;
resultDiv.style.display = 'none';
// 显示进度区域
progressContainer.style.display = 'block';
updateProgress(0, '爬虫启动中...');
// 发送POST请求运行爬虫
fetch('{% url "run_crawler" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'crawler_name=' + crawlerName
})
.then(response => response.json())
.then(data => {
if (data.status === 'success') {
currentTaskId = data.task_id;
pauseCrawlerBtn.disabled = false;
// 启动轮询检查爬虫状态
checkCrawlerStatus(currentTaskId);
} else {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
progressContainer.style.display = 'none';
}
})
.catch(error => {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '请求失败: ' + error;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
progressContainer.style.display = 'none';
});
}
// 检查爬虫状态的函数
function checkCrawlerStatus(taskId) {
// 清除之前的定时器
if (statusCheckInterval) {
clearInterval(statusCheckInterval);
}
// 设置新的定时器,每秒检查一次状态
statusCheckInterval = setInterval(() => {
fetch('{% url "crawler_status" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'task_id=' + taskId
})
.then(response => response.json())
.then(data => {
if (data.status === 'running') {
// 更新进度信息(模拟进度)
const elapsedTime = new Date() - new Date(data.start_time);
const progress = Math.min(90, Math.floor(elapsedTime / 1000));
updateProgress(progress, data.message);
} else if (data.status === 'completed') {
// 爬虫完成,显示结果
clearInterval(statusCheckInterval);
updateProgress(100, data.message);
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
// 显示结果信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result success';
resultDiv.textContent = data.message;
// 3秒后自动隐藏进度条
setTimeout(() => {
progressContainer.style.display = 'none';
}, 3000);
// 自动刷新页面以显示新文章
setTimeout(() => {
location.reload();
}, 2000);
} else if (data.status === 'paused') {
// 爬虫暂停
clearInterval(statusCheckInterval);
updateProgress(data.progress || 0, '爬虫已暂停');
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
// 显示结果信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result success';
resultDiv.textContent = '爬虫已暂停';
} else if (data.status === 'error') {
// 爬虫出错
clearInterval(statusCheckInterval);
progressContainer.style.display = 'none';
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
}
})
.catch(error => {
clearInterval(statusCheckInterval);
progressContainer.style.display = 'none';
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '检查状态失败: ' + error;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
});
}, 1000);
}
// 更新进度条函数
function updateProgress(percent, text) {
progressFill.style.width = percent + '%';
progressText.textContent = text;
}
// 暂停爬虫函数
function pauseCrawler(taskId) {
fetch('{% url "pause_crawler" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'task_id=' + taskId
})
.then(response => response.json())
.then(data => {
if (data.status === 'success') {
// 暂停成功更新UI
pauseCrawlerBtn.disabled = true;
updateProgress(data.progress || 0, '正在暂停...');
} else {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
}
})
.catch(error => {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '暂停请求失败: ' + error;
});
}
// 导出功能相关JavaScript // 导出功能相关JavaScript
const checkboxes = document.querySelectorAll('.article-checkbox'); const checkboxes = document.querySelectorAll('.article-checkbox');
const exportJsonBtn = document.getElementById('exportJsonBtn'); const exportJsonBtn = document.getElementById('exportJsonBtn');
@@ -643,7 +361,7 @@
const deselectAllBtn = document.getElementById('deselectAllBtn'); const deselectAllBtn = document.getElementById('deselectAllBtn');
// 新增:获取ZIP导出按钮元素 // 新增:获取ZIP导出按钮元素
const exportZipBtn = document.getElementById('exportZipBtn'); const exportZipBtn = document.getElementById('exportZipBtn');
// 更新导出按钮状态 // 更新导出按钮状态
function updateExportButtons() { function updateExportButtons() {
const selectedCount = document.querySelectorAll('.article-checkbox:checked').length; const selectedCount = document.querySelectorAll('.article-checkbox:checked').length;
@@ -651,12 +369,12 @@
exportCsvBtn.disabled = selectedCount === 0; exportCsvBtn.disabled = selectedCount === 0;
exportZipBtn.disabled = selectedCount === 0; // 新增:更新ZIP导出按钮状态 exportZipBtn.disabled = selectedCount === 0; // 新增:更新ZIP导出按钮状态
} }
// 为所有复选框添加事件监听器 // 为所有复选框添加事件监听器
checkboxes.forEach(checkbox => { checkboxes.forEach(checkbox => {
checkbox.addEventListener('change', updateExportButtons); checkbox.addEventListener('change', updateExportButtons);
}); });
// 全选功能 // 全选功能
selectAllBtn.addEventListener('click', () => { selectAllBtn.addEventListener('click', () => {
checkboxes.forEach(checkbox => { checkboxes.forEach(checkbox => {
@@ -664,7 +382,7 @@
}); });
updateExportButtons(); updateExportButtons();
}); });
// 取消全选功能 // 取消全选功能
deselectAllBtn.addEventListener('click', () => { deselectAllBtn.addEventListener('click', () => {
checkboxes.forEach(checkbox => { checkboxes.forEach(checkbox => {
@@ -672,12 +390,12 @@
}); });
updateExportButtons(); updateExportButtons();
}); });
// 导出为JSON功能 // 导出为JSON功能
exportJsonBtn.addEventListener('click', () => { exportJsonBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked')) const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value); .map(checkbox => checkbox.value);
// 发送POST请求导出文章 // 发送POST请求导出文章
fetch('{% url "export_articles" %}', { fetch('{% url "export_articles" %}', {
method: 'POST', method: 'POST',
@@ -690,32 +408,32 @@
format: 'json' format: 'json'
}) })
}) })
.then(response => { .then(response => {
if (response.ok) { if (response.ok) {
return response.blob(); return response.blob();
} }
throw new Error('导出失败'); throw new Error('导出失败');
}) })
.then(blob => { .then(blob => {
const url = window.URL.createObjectURL(blob); const url = window.URL.createObjectURL(blob);
const a = document.createElement('a'); const a = document.createElement('a');
a.href = url; a.href = url;
a.download = 'articles.json'; a.download = 'articles.json';
document.body.appendChild(a); document.body.appendChild(a);
a.click(); a.click();
window.URL.revokeObjectURL(url); window.URL.revokeObjectURL(url);
document.body.removeChild(a); document.body.removeChild(a);
}) })
.catch(error => { .catch(error => {
alert('导出失败: ' + error); alert('导出失败: ' + error);
}); });
}); });
// 导出为CSV功能 // 导出为CSV功能
exportCsvBtn.addEventListener('click', () => { exportCsvBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked')) const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value); .map(checkbox => checkbox.value);
// 发送POST请求导出文章 // 发送POST请求导出文章
fetch('{% url "export_articles" %}', { fetch('{% url "export_articles" %}', {
method: 'POST', method: 'POST',
@@ -728,32 +446,32 @@
format: 'csv' format: 'csv'
}) })
}) })
.then(response => { .then(response => {
if (response.ok) { if (response.ok) {
return response.blob(); return response.blob();
} }
throw new Error('导出失败'); throw new Error('导出失败');
}) })
.then(blob => { .then(blob => {
const url = window.URL.createObjectURL(blob); const url = window.URL.createObjectURL(blob);
const a = document.createElement('a'); const a = document.createElement('a');
a.href = url; a.href = url;
a.download = 'articles.csv'; a.download = 'articles.csv';
document.body.appendChild(a); document.body.appendChild(a);
a.click(); a.click();
window.URL.revokeObjectURL(url); window.URL.revokeObjectURL(url);
document.body.removeChild(a); document.body.removeChild(a);
}) })
.catch(error => { .catch(error => {
alert('导出失败: ' + error); alert('导出失败: ' + error);
}); });
}); });
// 新增:导出为ZIP包功能 // 新增:导出为ZIP包功能
exportZipBtn.addEventListener('click', () => { exportZipBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked')) const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value); .map(checkbox => checkbox.value);
// 发送POST请求导出文章为ZIP包 // 发送POST请求导出文章为ZIP包
fetch('{% url "export_articles" %}', { fetch('{% url "export_articles" %}', {
method: 'POST', method: 'POST',
@@ -766,27 +484,27 @@
format: 'zip' // 指定导出格式为ZIP format: 'zip' // 指定导出格式为ZIP
}) })
}) })
.then(response => { .then(response => {
if (response.ok) { if (response.ok) {
return response.blob(); return response.blob();
} }
throw new Error('导出失败'); throw new Error('导出失败');
}) })
.then(blob => { .then(blob => {
const url = window.URL.createObjectURL(blob); const url = window.URL.createObjectURL(blob);
const a = document.createElement('a'); const a = document.createElement('a');
a.href = url; a.href = url;
a.download = 'articles.zip'; a.download = 'articles.zip';
document.body.appendChild(a); document.body.appendChild(a);
a.click(); a.click();
window.URL.revokeObjectURL(url); window.URL.revokeObjectURL(url);
document.body.removeChild(a); document.body.removeChild(a);
}) })
.catch(error => { .catch(error => {
alert('导出失败: ' + error); alert('导出失败: ' + error);
}); });
}); });
// 初始化导出按钮状态 // 初始化导出按钮状态
updateExportButtons(); updateExportButtons();
</script> </script>

View File

@@ -9,7 +9,9 @@ urlpatterns = [
path('run-crawler/', views.run_crawler, name='run_crawler'), path('run-crawler/', views.run_crawler, name='run_crawler'),
# 新增:检查爬虫状态的路由 # 新增:检查爬虫状态的路由
path('crawler-status/', views.crawler_status, name='crawler_status'), path('crawler-status/', views.crawler_status, name='crawler_status'),
# 新增:暂停爬虫的路由
path('pause-crawler/', views.pause_crawler, name='pause_crawler'),
# 添加导出文章的路由 # 添加导出文章的路由
path('export-articles/', views.export_articles, name='export_articles'), path('export-articles/', views.export_articles, name='export_articles'),
# 添加自定义管理后台的路由 # 添加自定义管理后台的路由
] ]

View File

@@ -12,7 +12,6 @@ import csv
from django.views.decorators.csrf import csrf_exempt from django.views.decorators.csrf import csrf_exempt
from django.utils import timezone from django.utils import timezone
# 用于跟踪爬虫任务状态的全局字典 # 用于跟踪爬虫任务状态的全局字典
crawler_tasks = {} crawler_tasks = {}
@@ -73,7 +72,7 @@ def run_crawler(request):
# 生成任务ID # 生成任务ID
task_id = str(uuid.uuid4()) task_id = str(uuid.uuid4())
# 记录任务开始前的文章数量 # 记录任务开始前的文章数量
initial_count = Article.objects.count() initial_count = Article.objects.count()
@@ -87,18 +86,18 @@ def run_crawler(request):
'start_time': timezone.now(), 'start_time': timezone.now(),
'initial_count': initial_count 'initial_count': initial_count
} }
# 根据爬虫名称调用相应的命令 # 根据爬虫名称调用相应的命令
if crawler_name in ['crawl_xinhua', 'crawl_dongfangyancao']: if crawler_name in ['crawl_xinhua', 'crawl_dongfangyancao']:
call_command(crawler_name) call_command(crawler_name)
else: else:
# 如果是通用爬虫命令使用crawl_articles # 如果是通用爬虫命令使用crawl_articles
call_command('crawl_articles', crawler_name) call_command('crawl_articles', crawler_name)
# 计算新增文章数量 # 计算新增文章数量
final_count = Article.objects.count() final_count = Article.objects.count()
added_count = final_count - initial_count added_count = final_count - initial_count
# 更新任务状态为完成 # 更新任务状态为完成
crawler_tasks[task_id] = { crawler_tasks[task_id] = {
'status': 'completed', 'status': 'completed',
@@ -113,11 +112,11 @@ def run_crawler(request):
error_msg = "检测到重复文章URL已跳过重复项" error_msg = "检测到重复文章URL已跳过重复项"
else: else:
print(f"爬虫执行出错: {e}") print(f"爬虫执行出错: {e}")
# 计算实际新增文章数量(即使有错误也统计) # 计算实际新增文章数量(即使有错误也统计)
final_count = Article.objects.count() final_count = Article.objects.count()
added_count = final_count - initial_count added_count = final_count - initial_count
# 更新任务状态为完成(即使有部分错误) # 更新任务状态为完成(即使有部分错误)
crawler_tasks[task_id] = { crawler_tasks[task_id] = {
'status': 'completed', 'status': 'completed',
@@ -147,17 +146,47 @@ def crawler_status(request):
task_id = request.POST.get('task_id', '') task_id = request.POST.get('task_id', '')
if not task_id: if not task_id:
return JsonResponse({'status': 'error', 'message': '任务ID不能为空'}) return JsonResponse({'status': 'error', 'message': '任务ID不能为空'})
# 获取任务状态 # 获取任务状态
task_info = crawler_tasks.get(task_id) task_info = crawler_tasks.get(task_id)
if not task_info: if not task_info:
return JsonResponse({'status': 'error', 'message': '未找到任务'}) return JsonResponse({'status': 'error', 'message': '未找到任务'})
return JsonResponse(task_info) return JsonResponse(task_info)
except Exception as e: except Exception as e:
return JsonResponse({'status': 'error', 'message': str(e)}) return JsonResponse({'status': 'error', 'message': str(e)})
# 新增:暂停爬虫的视图
@require_http_methods(["POST"])
def pause_crawler(request):
"""
暂停爬虫任务
"""
try:
task_id = request.POST.get('task_id', '')
if not task_id:
return JsonResponse({'status': 'error', 'message': '任务ID不能为空'})
# 获取任务状态
task_info = crawler_tasks.get(task_id)
if not task_info:
return JsonResponse({'status': 'error', 'message': '未找到任务'})
# 在实际应用中,这里应该实现真正的暂停逻辑
# 目前我们只是更新任务状态来模拟暂停功能
task_info['status'] = 'paused'
task_info['message'] = '爬虫已暂停'
return JsonResponse({
'status': 'success',
'message': '爬虫已暂停',
'progress': 0 # 这里应该返回实际进度
})
except Exception as e:
return JsonResponse({'status': 'error', 'message': str(e)})
# 新增:文章导出视图 # 新增:文章导出视图
@csrf_exempt @csrf_exempt
@require_http_methods(["POST"]) @require_http_methods(["POST"])
@@ -167,13 +196,13 @@ def export_articles(request):
data = json.loads(request.body) data = json.loads(request.body)
article_ids = data.get('article_ids', []) article_ids = data.get('article_ids', [])
format_type = data.get('format', 'json') format_type = data.get('format', 'json')
# 获取选中的文章 # 获取选中的文章
articles = Article.objects.filter(id__in=article_ids) articles = Article.objects.filter(id__in=article_ids)
if not articles.exists(): if not articles.exists():
return HttpResponse('没有选中文章', status=400) return HttpResponse('没有选中文章', status=400)
# 根据格式类型导出 # 根据格式类型导出
if format_type == 'json': if format_type == 'json':
# 准备JSON数据 # 准备JSON数据
@@ -189,7 +218,7 @@ def export_articles(request):
'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'), 'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
'media_files': article.media_files 'media_files': article.media_files
}) })
# 创建JSON响应 # 创建JSON响应
response = HttpResponse( response = HttpResponse(
json.dumps(articles_data, ensure_ascii=False, indent=2), json.dumps(articles_data, ensure_ascii=False, indent=2),
@@ -197,16 +226,16 @@ def export_articles(request):
) )
response['Content-Disposition'] = 'attachment; filename="articles.json"' response['Content-Disposition'] = 'attachment; filename="articles.json"'
return response return response
elif format_type == 'csv': elif format_type == 'csv':
# 创建CSV响应 # 创建CSV响应
response = HttpResponse(content_type='text/csv') response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="articles.csv"' response['Content-Disposition'] = 'attachment; filename="articles.csv"'
# 创建CSV写入器 # 创建CSV写入器
writer = csv.writer(response) writer = csv.writer(response)
writer.writerow(['ID', '标题', '网站', 'URL', '发布时间', '内容', '创建时间', '媒体文件']) writer.writerow(['ID', '标题', '网站', 'URL', '发布时间', '内容', '创建时间', '媒体文件'])
# 写入文章数据 # 写入文章数据
for article in articles: for article in articles:
writer.writerow([ writer.writerow([
@@ -219,25 +248,25 @@ def export_articles(request):
article.created_at.strftime('%Y-%m-%d %H:%M:%S'), article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
';'.join(article.media_files) if article.media_files else '' ';'.join(article.media_files) if article.media_files else ''
]) ])
return response return response
# 新增:支持ZIP格式导出 # 新增:支持ZIP格式导出
elif format_type == 'zip': elif format_type == 'zip':
import zipfile import zipfile
from io import BytesIO from io import BytesIO
from django.conf import settings from django.conf import settings
import os import os
# 创建内存中的ZIP文件 # 创建内存中的ZIP文件
zip_buffer = BytesIO() zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zip_file: with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
# 为每篇文章创建Word文档并添加到ZIP文件中 # 为每篇文章创建Word文档并添加到ZIP文件中
for article in articles: for article in articles:
# 为每篇文章创建单独的文件夹 # 为每篇文章创建单独的文件夹
article_folder = f"article_{article.id}_{article.title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')}" article_folder = f"article_{article.id}_{article.title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')}"
# 创建文章数据 # 创建文章数据
article_data = { article_data = {
'id': article.id, 'id': article.id,
@@ -249,7 +278,7 @@ def export_articles(request):
'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'), 'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
'media_files': article.media_files 'media_files': article.media_files
} }
# 将文章数据保存为Word文件并添加到ZIP # 将文章数据保存为Word文件并添加到ZIP
try: try:
from docx import Document from docx import Document
@@ -257,23 +286,24 @@ def export_articles(request):
from io import BytesIO from io import BytesIO
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import requests import requests
# 创建Word文档 # 创建Word文档
doc = Document() doc = Document()
doc.add_heading(article.title, 0) doc.add_heading(article.title, 0)
# 添加文章元数据 # 添加文章元数据
doc.add_paragraph(f"网站: {article.website.name}") doc.add_paragraph(f"网站: {article.website.name}")
doc.add_paragraph(f"URL: {article.url}") doc.add_paragraph(f"URL: {article.url}")
doc.add_paragraph(f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}") doc.add_paragraph(
f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}")
doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S')}") doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
# 添加文章内容 # 添加文章内容
doc.add_heading('内容', level=1) doc.add_heading('内容', level=1)
# 处理HTML内容 # 处理HTML内容
soup = BeautifulSoup(article.content, 'html.parser') soup = BeautifulSoup(article.content, 'html.parser')
# 处理内容中的图片 # 处理内容中的图片
for img in soup.find_all('img'): for img in soup.find_all('img'):
src = img.get('src', '') src = img.get('src', '')
@@ -293,13 +323,13 @@ def export_articles(request):
except Exception as e: except Exception as e:
# 如果添加图片失败添加图片URL作为文本 # 如果添加图片失败添加图片URL作为文本
doc.add_paragraph(f"[图片: {src}]") doc.add_paragraph(f"[图片: {src}]")
# 移除原始img标签 # 移除原始img标签
img.decompose() img.decompose()
content_text = soup.get_text() content_text = soup.get_text()
doc.add_paragraph(content_text) doc.add_paragraph(content_text)
# 添加媒体文件信息 # 添加媒体文件信息
if article.media_files: if article.media_files:
doc.add_heading('媒体文件', level=1) doc.add_heading('媒体文件', level=1)
@@ -309,7 +339,7 @@ def export_articles(request):
if os.path.exists(full_path): if os.path.exists(full_path):
# 检查文件扩展名以确定处理方式 # 检查文件扩展名以确定处理方式
file_extension = os.path.splitext(media_file)[1].lower() file_extension = os.path.splitext(media_file)[1].lower()
# 图片文件处理 # 图片文件处理
if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']: if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
doc.add_picture(full_path, width=Inches(4.0)) doc.add_picture(full_path, width=Inches(4.0))
@@ -324,7 +354,7 @@ def export_articles(request):
if media_file.startswith('http'): if media_file.startswith('http'):
response = requests.get(media_file, timeout=10) response = requests.get(media_file, timeout=10)
file_extension = os.path.splitext(media_file)[1].lower() file_extension = os.path.splitext(media_file)[1].lower()
# 图片文件处理 # 图片文件处理
if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']: if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
image_stream = BytesIO(response.content) image_stream = BytesIO(response.content)
@@ -335,20 +365,22 @@ def export_articles(request):
doc.add_paragraph(media_file) doc.add_paragraph(media_file)
except Exception as e: except Exception as e:
doc.add_paragraph(media_file) doc.add_paragraph(media_file)
# 保存Word文档到内存 # 保存Word文档到内存
doc_buffer = BytesIO() doc_buffer = BytesIO()
doc.save(doc_buffer) doc.save(doc_buffer)
doc_buffer.seek(0) doc_buffer.seek(0)
# 将Word文档添加到ZIP包 # 将Word文档添加到ZIP包
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'), doc_buffer.read()) zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'),
doc_buffer.read())
except ImportError: except ImportError:
# 如果没有安装python-docx库回退到JSON格式 # 如果没有安装python-docx库回退到JSON格式
json_data = json.dumps(article_data, ensure_ascii=False, indent=2) json_data = json.dumps(article_data, ensure_ascii=False, indent=2)
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.json'), json_data) zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.json'),
json_data)
# 添加媒体文件到ZIP包 # 添加媒体文件到ZIP包
if article.media_files: if article.media_files:
for media_file in article.media_files: for media_file in article.media_files:
@@ -362,19 +394,21 @@ def export_articles(request):
if media_file.startswith('http'): if media_file.startswith('http'):
import requests import requests
response = requests.get(media_file, timeout=10) response = requests.get(media_file, timeout=10)
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(media_file)), response.content) zip_file.writestr(
os.path.join(article_folder, 'media', os.path.basename(media_file)),
response.content)
except Exception as e: except Exception as e:
# 如果添加媒体文件失败,继续处理其他文件 # 如果添加媒体文件失败,继续处理其他文件
pass pass
# 创建HttpResponse # 创建HttpResponse
zip_buffer.seek(0) zip_buffer.seek(0)
response = HttpResponse(zip_buffer.getvalue(), content_type='application/zip') response = HttpResponse(zip_buffer.getvalue(), content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename=articles_export.zip' response['Content-Disposition'] = 'attachment; filename=articles_export.zip'
return response return response
else: else:
return HttpResponse('不支持的格式', status=400) return HttpResponse('不支持的格式', status=400)
except Exception as e: except Exception as e:
return HttpResponse(f'导出失败: {str(e)}', status=500) return HttpResponse(f'导出失败: {str(e)}', status=500)