Unknow change

This commit is contained in:
2025-08-13 18:40:31 +08:00
parent 5e396796ca
commit c618528a0a
6 changed files with 996 additions and 528 deletions

View File

@@ -34,7 +34,7 @@ def run_crawler_view(request):
if not website_name:
messages.error(request, '请选择要爬取的网站')
return redirect('admin:core_article_changelist')
try:
# 根据网站名称确定要执行的爬虫命令
if website_name == 'crawl_xinhua':
@@ -46,14 +46,14 @@ def run_crawler_view(request):
else:
# 对于其他网站,使用通用爬虫命令
crawler_name = 'crawl_articles'
# 运行爬虫命令不传递website_name作为参数
call_command(crawler_name)
messages.success(request, f'成功执行爬虫: {crawler_name}')
except Exception as e:
messages.error(request, f'执行爬虫失败: {str(e)}')
return redirect('admin:core_article_changelist')
@@ -241,12 +241,12 @@ class ArticleAdmin(admin.ModelAdmin):
# 创建内存中的ZIP文件
zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
for article in queryset:
# 为每篇文章创建单独的文件夹
article_folder = f"article_{article.id}_{article.title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')}"
# 创建Word文档
doc = Document()
doc.add_heading(article.title, 0)
@@ -281,7 +281,8 @@ class ArticleAdmin(admin.ModelAdmin):
image_stream = BytesIO(response.content)
doc.add_picture(image_stream, width=Inches(4.0))
# 将网络文件保存到ZIP
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(src)), response.content)
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(src)),
response.content)
else:
# 本地图片
full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
@@ -310,7 +311,7 @@ class ArticleAdmin(admin.ModelAdmin):
full_path = os.path.join(settings.MEDIA_ROOT, media_file)
# 检查文件扩展名以确定处理方式
file_extension = os.path.splitext(media_file)[1].lower()
# 图片文件处理
if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
if os.path.exists(full_path):
@@ -325,7 +326,9 @@ class ArticleAdmin(admin.ModelAdmin):
image_stream = BytesIO(response.content)
doc.add_picture(image_stream, width=Inches(4.0))
# 将网络文件保存到ZIP
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(media_file)), response.content)
zip_file.writestr(
os.path.join(article_folder, 'media', os.path.basename(media_file)),
response.content)
else:
doc.add_paragraph(media_file)
# 视频文件处理
@@ -341,7 +344,9 @@ class ArticleAdmin(admin.ModelAdmin):
if media_file.startswith('http'):
# 将网络文件保存到ZIP
response = requests.get(media_file, timeout=10)
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(media_file)), response.content)
zip_file.writestr(
os.path.join(article_folder, 'media', os.path.basename(media_file)),
response.content)
doc.add_paragraph(f"[视频文件: {media_file}]")
else:
doc.add_paragraph(media_file)
@@ -355,7 +360,9 @@ class ArticleAdmin(admin.ModelAdmin):
# 如果是URL格式的媒体文件
if media_file.startswith('http'):
response = requests.get(media_file, timeout=10)
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(media_file)), response.content)
zip_file.writestr(
os.path.join(article_folder, 'media', os.path.basename(media_file)),
response.content)
doc.add_paragraph(f"[文件: {media_file}]")
else:
doc.add_paragraph(media_file)
@@ -366,7 +373,8 @@ class ArticleAdmin(admin.ModelAdmin):
doc_buffer = BytesIO()
doc.save(doc_buffer)
doc_buffer.seek(0)
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'), doc_buffer.read())
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'),
doc_buffer.read())
# 创建HttpResponse
zip_buffer.seek(0)
@@ -512,5 +520,4 @@ class DongfangyancaoArticleAdmin(admin.ModelAdmin):
export_as_json.short_description = "导出选中文章为JSON格式"
# 在各自的管理站点中注册模型

View File

@@ -13,7 +13,7 @@
<option value="crawl_dongfangyancao">东方烟草报</option>
<option value="crawl_articles">通用爬虫</option>
</select>
<input type="submit" value="执行爬虫" class="default" style="margin-left: 10px;" />
<input type="submit" value="执行爬虫" class="default" style="margin-left: 10px;"/>
</form>
</div>
{% endblock %}

View File

@@ -2,24 +2,23 @@
<html lang="zh">
<head>
<meta charset="UTF-8"/>
<title>{{ article.title }}</title>
<title>绿色课堂文章列表</title>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
line-height: 1.6;
color: #333;
max-width: 1200px; /* 修改:同步调整页面最大宽度与列表页一致 */
margin: 0 auto;
padding: 20px;
background-color: #f8f9fa;
background-color: #f0f8ff; /* 统一背景色调 */
}
.article-container {
.container {
background: white;
border-radius: 8px;
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
padding: 30px;
margin-bottom: 20px;
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); /* 添加轻微阴影 */
border-radius: 8px; /* 添加圆角 */
}
h1 {
@@ -29,57 +28,765 @@
margin-top: 0;
}
.meta {
color: #7f8c8d;
font-size: 0.9em;
.filters {
margin-bottom: 20px;
padding: 15px;
background-color: #e3f2fd; /* 统一滤镜背景色调 */
border-radius: 5px;
}
hr {
border: 0;
height: 1px;
background: #ecf0f1;
margin: 20px 0;
}
.content {
font-size: 16px;
}
.content img {
max-width: 100%;
height: auto;
border-radius: 4px;
margin: 10px 0;
}
.back-link {
.filters a {
display: inline-block;
padding: 10px 20px;
padding: 5px 10px;
margin: 0 5px 5px 0;
background-color: #bbdefb; /* 统一链接背景色调 */
color: #0d47a1;
text-decoration: none;
border-radius: 3px;
}
.filters a.active {
background-color: #3498db;
color: white;
}
ul {
list-style: none;
padding: 0;
}
li {
padding: 10px 0;
border-bottom: 1px solid #e0e0e0; /* 统一分隔线颜色 */
}
li:last-child {
border-bottom: none;
}
a {
color: #1976d2; /* 统一链接颜色 */
text-decoration: none;
}
a:hover {
color: #0d47a1; /* 统一悬停颜色 */
text-decoration: underline;
}
.meta {
color: #78909c; /* 统一元数据颜色 */
font-size: 0.9em;
}
.pagination {
margin-top: 30px;
text-align: center;
padding: 20px 0;
}
.pagination a {
display: inline-block;
padding: 8px 16px;
background-color: #3498db;
color: white;
text-decoration: none;
border-radius: 4px;
transition: background-color 0.3s;
margin: 0 2px; /* 修改:调整页码间距 */
}
.back-link:hover {
.pagination a:hover {
background-color: #2980b9;
}
.pagination span {
margin: 0 10px;
color: #7f8c8d;
}
/* 新增:当前页码样式 */
.pagination .current {
background-color: #2980b9;
cursor: default;
}
/* 新增:省略号样式 */
.pagination .ellipsis {
display: inline-block;
padding: 8px 4px;
color: #7f8c8d;
}
/* 新增:搜索框样式 */
.search-form {
margin-bottom: 20px;
padding: 15px;
background-color: #e3f2fd; /* 统一搜索框背景色调 */
border-radius: 5px;
}
.search-form input[type="text"] {
padding: 8px 12px;
border: 1px solid #bbdefb; /* 统一边框颜色 */
border-radius: 4px;
width: 300px;
margin-right: 10px;
background-color: #fff;
}
.search-form input[type="submit"] {
padding: 8px 16px;
background-color: #3498db;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
}
.search-form input[type="submit"]:hover {
background-color: #2980b9;
}
.search-info {
color: #78909c; /* 统一搜索信息颜色 */
font-size: 0.9em;
margin-bottom: 10px;
}
/* 新增:左侧筛选栏样式 */
.content-wrapper {
display: flex;
gap: 20px;
}
.sidebar {
flex: 0 0 200px;
background-color: #e3f2fd; /* 统一边栏背景色调 */
border-radius: 5px;
padding: 15px;
}
.main-content {
flex: 1;
}
.sidebar .filters {
margin-bottom: 20px;
padding: 0;
background-color: transparent;
}
.sidebar .filters strong {
display: block;
margin-bottom: 10px;
color: #2c3e50;
}
.sidebar .filters a {
display: block;
padding: 8px 10px;
margin: 0 0 5px 0;
background-color: #bbdefb; /* 统一边栏链接背景色调 */
color: #0d47a1;
text-decoration: none;
border-radius: 3px;
}
.sidebar .filters a.active {
background-color: #3498db;
color: white;
}
/* 新增:导出功能样式 */
.export-section {
margin-bottom: 20px;
padding: 15px;
background-color: #e8f5e9; /* 统一导出区域背景色调 */
border-radius: 5px;
text-align: center;
}
.export-btn {
padding: 10px 20px;
background-color: #4caf50; /* 统一按钮背景色调 */
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
margin: 0 5px;
}
.export-btn:hover {
background-color: #388e3c; /* 统一按钮悬停色调 */
}
.export-btn:disabled {
background-color: #9e9e9e; /* 统一禁用按钮色调 */
cursor: not-allowed;
}
.article-checkbox {
margin-right: 10px;
}
/* 新增:爬虫控制按钮样式 */
.crawler-control {
margin-bottom: 20px;
padding: 15px;
background-color: #fff3e0; /* 统一爬虫控制区域背景色调 */
border-radius: 5px;
text-align: center;
}
.crawler-btn {
padding: 10px 20px;
background-color: #ff9800; /* 统一爬虫按钮背景色调 */
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
margin: 0 5px;
}
.crawler-btn:hover {
background-color: #f57c00; /* 统一爬虫按钮悬停色调 */
}
.crawler-btn:disabled {
background-color: #9e9e9e; /* 统一禁用爬虫按钮色调 */
cursor: not-allowed;
}
.crawler-result {
margin-top: 10px;
padding: 10px;
border-radius: 4px;
display: none;
}
.crawler-result.success {
background-color: #e8f5e9;
color: #2e7d32;
}
.crawler-result.error {
background-color: #ffebee;
color: #c62828;
}
/* 新增:进度条样式 */
.progress-container {
margin-top: 10px;
display: none;
}
.progress-bar {
width: 100%;
height: 20px;
background-color: #e0e0e0;
border-radius: 10px;
overflow: hidden;
}
.progress-fill {
height: 100%;
background-color: #4caf50;
width: 0%;
transition: width 0.3s ease;
}
.progress-text {
margin-top: 5px;
font-size: 14px;
color: #666;
}
</style>
</head>
<body>
<div class="article-container">
<h1>{{ article.title }}</h1>
<div class="meta">
<p>发布时间: {{ article.pub_date|date:"Y-m-d H:i" }}</p>
<div class="container">
<h1>绿色课堂文章列表</h1>
<!-- 新增:搜索表单 -->
<div class="search-form">
<form method="get">
<input type="text" name="q" placeholder="输入关键词搜索文章..." value="{{ search_query }}">
{% if selected_website %}
<input type="hidden" name="website" value="{{ selected_website.id }}">
{% endif %}
<input type="submit" value="搜索">
</form>
</div>
<hr/>
<div class="content">
{{ article.content|safe }}
<div class="content-wrapper">
<!-- 左侧筛选栏 -->
<div class="sidebar">
<div class="filters">
<strong>按网站筛选:</strong>
<a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}"
{% if not selected_website %}class="active" {% endif %}>全部</a>
{% for website in websites %}
<a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}"
{% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
{% endfor %}
</div>
</div>
<!-- 主内容区域 -->
<div class="main-content">
<!-- 新增:搜索结果信息 -->
{% if search_query %}
<div class="search-info">
搜索 "{{ search_query }}" 找到 {{ page_obj.paginator.count }} 篇文章
<a href="{% if selected_website %}?website={{ selected_website.id }}{% else %}{% url 'article_list' %}{% endif %}">清除搜索</a>
</div>
{% endif %}
<!-- 新增:导出功能 -->
<div class="export-section">
<button id="selectAllBtn" class="export-btn">全选</button>
<button id="deselectAllBtn" class="export-btn">取消全选</button>
<button id="exportJsonBtn" class="export-btn" disabled>导出为JSON</button>
<button id="exportCsvBtn" class="export-btn" disabled>导出为CSV</button>
<!-- 新增:导出为ZIP包按钮 -->
<button id="exportZipBtn" class="export-btn" disabled>导出为ZIP包</button>
</div>
<ul>
{% for article in page_obj %}
<li>
<input type="checkbox" class="article-checkbox" value="{{ article.id }}"
id="article_{{ article.id }}">
<a href="{% url 'article_detail' article.id %}">{{ article.title }}</a>
<div class="meta">({{ article.website.name }} - {{ article.created_at|date:"Y-m-d" }})</div>
</li>
{% empty %}
<li>暂无文章</li>
{% endfor %}
</ul>
<div class="pagination">
{% if page_obj.has_previous %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo;
首页</a>
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page=1">&laquo; 首页</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.previous_page_number }}">上一页</a>
{% endif %}
{% endif %}
<span>第 {{ page_obj.number }} 页,共 {{ page_obj.paginator.num_pages }} 页</span>
<!-- 修改:优化页码显示逻辑 -->
{% with page_obj.paginator as paginator %}
{% for num in paginator.page_range %}
{% if page_obj.number == num %}
<a href="#" class="current">{{ num }}</a>
{% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
{% endif %}
{% elif num == 1 or num == paginator.num_pages %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
{% endif %}
{% elif num == page_obj.number|add:'-3' or num == page_obj.number|add:'3' %}
<span class="ellipsis">...</span>
{% endif %}
{% endfor %}
{% endwith %}
{% if page_obj.has_next %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页
&raquo;</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.next_page_number }}">下一页</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页
&raquo;</a>
{% endif %}
{% endif %}
</div>
</div>
</div>
<hr/>
<p><a href="{% url 'article_list' %}" class="back-link">← 返回列表</a></p>
</div>
<script>
// 存储当前任务ID和检查状态的定时器
let currentTaskId = null;
let statusCheckInterval = null;
// 获取页面元素
const runCrawlerBtn = document.getElementById('runCrawlerBtn');
const runDongfangyancaoCrawlerBtn = document.getElementById('runDongfangyancaoCrawlerBtn');
const pauseCrawlerBtn = document.getElementById('pauseCrawlerBtn');
const progressContainer = document.getElementById('crawlerProgress');
const progressFill = document.getElementById('progressFill');
const progressText = document.getElementById('progressText');
const resultDiv = document.getElementById('crawlerResult');
// 绑定爬虫按钮事件
runCrawlerBtn.addEventListener('click', function () {
runCrawler('www.news.cn', 'crawl_xinhua');
});
runDongfangyancaoCrawlerBtn.addEventListener('click', function () {
runCrawler('东方烟草报', 'crawl_dongfangyancao');
});
// 暂停按钮事件
pauseCrawlerBtn.addEventListener('click', function () {
if (currentTaskId) {
pauseCrawler(currentTaskId);
}
});
// 运行爬虫函数
function runCrawler(websiteName, crawlerName) {
// 禁用按钮,防止重复点击
runCrawlerBtn.disabled = true;
runDongfangyancaoCrawlerBtn.disabled = true;
resultDiv.style.display = 'none';
// 显示进度区域
progressContainer.style.display = 'block';
updateProgress(0, '爬虫启动中...');
// 发送POST请求运行爬虫
fetch('{% url "run_crawler" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'crawler_name=' + crawlerName
})
.then(response => response.json())
.then(data => {
if (data.status === 'success') {
currentTaskId = data.task_id;
pauseCrawlerBtn.disabled = false;
// 启动轮询检查爬虫状态
checkCrawlerStatus(currentTaskId);
} else {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
progressContainer.style.display = 'none';
}
})
.catch(error => {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '请求失败: ' + error;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
progressContainer.style.display = 'none';
});
}
// 检查爬虫状态的函数
function checkCrawlerStatus(taskId) {
// 清除之前的定时器
if (statusCheckInterval) {
clearInterval(statusCheckInterval);
}
// 设置新的定时器,每秒检查一次状态
statusCheckInterval = setInterval(() => {
fetch('{% url "crawler_status" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'task_id=' + taskId
})
.then(response => response.json())
.then(data => {
if (data.status === 'running') {
// 更新进度信息(模拟进度)
const elapsedTime = new Date() - new Date(data.start_time);
const progress = Math.min(90, Math.floor(elapsedTime / 1000));
updateProgress(progress, data.message);
} else if (data.status === 'completed') {
// 爬虫完成,显示结果
clearInterval(statusCheckInterval);
updateProgress(100, data.message);
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
// 显示结果信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result success';
resultDiv.textContent = data.message;
// 3秒后自动隐藏进度条
setTimeout(() => {
progressContainer.style.display = 'none';
}, 3000);
// 自动刷新页面以显示新文章
setTimeout(() => {
location.reload();
}, 2000);
} else if (data.status === 'paused') {
// 爬虫暂停
clearInterval(statusCheckInterval);
updateProgress(data.progress || 0, '爬虫已暂停');
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
// 显示结果信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result success';
resultDiv.textContent = '爬虫已暂停';
} else if (data.status === 'error') {
// 爬虫出错
clearInterval(statusCheckInterval);
progressContainer.style.display = 'none';
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
}
})
.catch(error => {
clearInterval(statusCheckInterval);
progressContainer.style.display = 'none';
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '检查状态失败: ' + error;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
});
}, 1000);
}
// 更新进度条函数
function updateProgress(percent, text) {
progressFill.style.width = percent + '%';
progressText.textContent = text;
}
// 暂停爬虫函数
function pauseCrawler(taskId) {
fetch('{% url "pause_crawler" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'task_id=' + taskId
})
.then(response => response.json())
.then(data => {
if (data.status === 'success') {
// 暂停成功更新UI
pauseCrawlerBtn.disabled = true;
updateProgress(data.progress || 0, '正在暂停...');
} else {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
}
})
.catch(error => {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '暂停请求失败: ' + error;
});
}
// 导出功能相关JavaScript
const checkboxes = document.querySelectorAll('.article-checkbox');
const exportJsonBtn = document.getElementById('exportJsonBtn');
const exportCsvBtn = document.getElementById('exportCsvBtn');
const selectAllBtn = document.getElementById('selectAllBtn');
const deselectAllBtn = document.getElementById('deselectAllBtn');
// 新增:获取ZIP导出按钮元素
const exportZipBtn = document.getElementById('exportZipBtn');
// 更新导出按钮状态
function updateExportButtons() {
const selectedCount = document.querySelectorAll('.article-checkbox:checked').length;
exportJsonBtn.disabled = selectedCount === 0;
exportCsvBtn.disabled = selectedCount === 0;
exportZipBtn.disabled = selectedCount === 0; // 新增:更新ZIP导出按钮状态
}
// 为所有复选框添加事件监听器
checkboxes.forEach(checkbox => {
checkbox.addEventListener('change', updateExportButtons);
});
// 全选功能
selectAllBtn.addEventListener('click', () => {
checkboxes.forEach(checkbox => {
checkbox.checked = true;
});
updateExportButtons();
});
// 取消全选功能
deselectAllBtn.addEventListener('click', () => {
checkboxes.forEach(checkbox => {
checkbox.checked = false;
});
updateExportButtons();
});
// 导出为JSON功能
exportJsonBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value);
// 发送POST请求导出文章
fetch('{% url "export_articles" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRFToken': '{{ csrf_token }}'
},
body: JSON.stringify({
article_ids: selectedArticles,
format: 'json'
})
})
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.json';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
});
// 导出为CSV功能
exportCsvBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value);
// 发送POST请求导出文章
fetch('{% url "export_articles" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRFToken': '{{ csrf_token }}'
},
body: JSON.stringify({
article_ids: selectedArticles,
format: 'csv'
})
})
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.csv';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
});
// 新增:导出为ZIP包功能
exportZipBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value);
// 发送POST请求导出文章为ZIP包
fetch('{% url "export_articles" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRFToken': '{{ csrf_token }}'
},
body: JSON.stringify({
article_ids: selectedArticles,
format: 'zip' // 指定导出格式为ZIP
})
})
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.zip';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
});
// 初始化导出按钮状态
updateExportButtons();
</script>
</body>
</html>

View File

@@ -17,7 +17,7 @@
background: white;
padding: 30px;
margin-bottom: 20px;
box-shadow: 0 2px 5px rgba(0,0,0,0.05); /* 添加轻微阴影 */
box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); /* 添加轻微阴影 */
border-radius: 8px; /* 添加圆角 */
}
@@ -116,7 +116,7 @@
padding: 8px 4px;
color: #7f8c8d;
}
/* 新增:搜索框样式 */
.search-form {
margin-bottom: 20px;
@@ -124,7 +124,7 @@
background-color: #e3f2fd; /* 统一搜索框背景色调 */
border-radius: 5px;
}
.search-form input[type="text"] {
padding: 8px 12px;
border: 1px solid #bbdefb; /* 统一边框颜色 */
@@ -133,7 +133,7 @@
margin-right: 10px;
background-color: #fff;
}
.search-form input[type="submit"] {
padding: 8px 16px;
background-color: #3498db;
@@ -142,46 +142,46 @@
border-radius: 4px;
cursor: pointer;
}
.search-form input[type="submit"]:hover {
background-color: #2980b9;
}
.search-info {
color: #78909c; /* 统一搜索信息颜色 */
font-size: 0.9em;
margin-bottom: 10px;
}
/* 新增:左侧筛选栏样式 */
.content-wrapper {
display: flex;
gap: 20px;
}
.sidebar {
flex: 0 0 200px;
background-color: #e3f2fd; /* 统一边栏背景色调 */
border-radius: 5px;
padding: 15px;
}
.main-content {
flex: 1;
}
.sidebar .filters {
margin-bottom: 20px;
padding: 0;
background-color: transparent;
}
.sidebar .filters strong {
display: block;
margin-bottom: 10px;
color: #2c3e50;
}
.sidebar .filters a {
display: block;
padding: 8px 10px;
@@ -191,12 +191,12 @@
text-decoration: none;
border-radius: 3px;
}
.sidebar .filters a.active {
background-color: #3498db;
color: white;
}
/* 新增:导出功能样式 */
.export-section {
margin-bottom: 20px;
@@ -205,7 +205,7 @@
border-radius: 5px;
text-align: center;
}
.export-btn {
padding: 10px 20px;
background-color: #4caf50; /* 统一按钮背景色调 */
@@ -216,118 +216,31 @@
font-size: 16px;
margin: 0 5px;
}
.export-btn:hover {
background-color: #388e3c; /* 统一按钮悬停色调 */
}
.export-btn:disabled {
background-color: #9e9e9e; /* 统一禁用按钮色调 */
cursor: not-allowed;
}
.article-checkbox {
margin-right: 10px;
}
/* 新增:爬虫控制按钮样式 */
.crawler-control {
margin-bottom: 20px;
padding: 15px;
background-color: #fff3e0; /* 统一爬虫控制区域背景色调 */
border-radius: 5px;
text-align: center;
}
.crawler-btn {
padding: 10px 20px;
background-color: #ff9800; /* 统一爬虫按钮背景色调 */
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
margin: 0 5px;
}
.crawler-btn:hover {
background-color: #f57c00; /* 统一爬虫按钮悬停色调 */
}
.crawler-btn:disabled {
background-color: #9e9e9e; /* 统一禁用爬虫按钮色调 */
cursor: not-allowed;
}
.crawler-result {
margin-top: 10px;
padding: 10px;
border-radius: 4px;
display: none;
}
.crawler-result.success {
background-color: #e8f5e9;
color: #2e7d32;
}
.crawler-result.error {
background-color: #ffebee;
color: #c62828;
}
/* 新增:进度条样式 */
.progress-container {
margin-top: 10px;
display: none;
}
.progress-bar {
width: 100%;
height: 20px;
background-color: #e0e0e0;
border-radius: 10px;
overflow: hidden;
}
.progress-fill {
height: 100%;
background-color: #4caf50;
width: 0%;
transition: width 0.3s ease;
}
.progress-text {
margin-top: 5px;
font-size: 14px;
color: #666;
}
</style>
</head>
<body>
<div class="container">
<h1>绿色课堂文章列表</h1>
<!-- 新增:爬虫控制按钮 -->
<div class="crawler-control">
<button id="runCrawlerBtn" class="crawler-btn" data-website="www.news.cn">执行新华网爬虫</button>
<button id="runDongfangyancaoCrawlerBtn" class="crawler-btn" data-website="东方烟草报">执行东方烟草报爬虫</button>
<button id="pauseCrawlerBtn" class="crawler-btn" disabled>暂停爬虫</button>
<div id="crawlerProgress" class="progress-container">
<div class="progress-bar">
<div class="progress-fill" id="progressFill"></div>
</div>
<div class="progress-text" id="progressText">准备中...</div>
</div>
<div id="crawlerResult" class="crawler-result"></div>
</div>
<!-- 新增:搜索表单 -->
<div class="search-form">
<form method="get">
<input type="text" name="q" placeholder="输入关键词搜索文章..." value="{{ search_query }}">
{% if selected_website %}
<input type="hidden" name="website" value="{{ selected_website.id }}">
<input type="hidden" name="website" value="{{ selected_website.id }}">
{% endif %}
<input type="submit" value="搜索">
</form>
@@ -338,23 +251,25 @@
<div class="sidebar">
<div class="filters">
<strong>按网站筛选:</strong>
<a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}" {% if not selected_website %}class="active" {% endif %}>全部</a>
<a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}"
{% if not selected_website %}class="active" {% endif %}>全部</a>
{% for website in websites %}
<a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}" {% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
<a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}"
{% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
{% endfor %}
</div>
</div>
<!-- 主内容区域 -->
<div class="main-content">
<!-- 新增:搜索结果信息 -->
{% if search_query %}
<div class="search-info">
搜索 "{{ search_query }}" 找到 {{ page_obj.paginator.count }} 篇文章
<a href="{% if selected_website %}?website={{ selected_website.id }}{% else %}{% url 'article_list' %}{% endif %}">清除搜索</a>
</div>
<div class="search-info">
搜索 "{{ search_query }}" 找到 {{ page_obj.paginator.count }} 篇文章
<a href="{% if selected_website %}?website={{ selected_website.id }}{% else %}{% url 'article_list' %}{% endif %}">清除搜索</a>
</div>
{% endif %}
<!-- 新增:导出功能 -->
<div class="export-section">
<button id="selectAllBtn" class="export-btn">全选</button>
@@ -367,60 +282,70 @@
<ul>
{% for article in page_obj %}
<li>
<input type="checkbox" class="article-checkbox" value="{{ article.id }}" id="article_{{ article.id }}">
<a href="{% url 'article_detail' article.id %}">{{ article.title }}</a>
<div class="meta">({{ article.website.name }} - {{ article.created_at|date:"Y-m-d" }})</div>
</li>
{% empty %}
<li>暂无文章</li>
<li>
<input type="checkbox" class="article-checkbox" value="{{ article.id }}"
id="article_{{ article.id }}">
<a href="{% url 'article_detail' article.id %}">{{ article.title }}</a>
<div class="meta">({{ article.website.name }} - {{ article.created_at|date:"Y-m-d" }})</div>
</li>
{% empty %}
<li>暂无文章</li>
{% endfor %}
</ul>
<div class="pagination">
{% if page_obj.has_previous %}
{% if selected_website %}
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo; 首页</a>
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page=1">&laquo; 首页</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.previous_page_number }}">上一页</a>
{% endif %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo;
首页</a>
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page=1">&laquo; 首页</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.previous_page_number }}">上一页</a>
{% endif %}
{% endif %}
<span>第 {{ page_obj.number }} 页,共 {{ page_obj.paginator.num_pages }} 页</span>
<!-- 修改:优化页码显示逻辑 -->
{% with page_obj.paginator as paginator %}
{% for num in paginator.page_range %}
{% if page_obj.number == num %}
<a href="#" class="current">{{ num }}</a>
{% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
{% if selected_website %}
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
{% endif %}
{% elif num == 1 or num == paginator.num_pages %}
{% if selected_website %}
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
{% endif %}
{% elif num == page_obj.number|add:'-3' or num == page_obj.number|add:'3' %}
<span class="ellipsis">...</span>
{% endif %}
{% endfor %}
{% for num in paginator.page_range %}
{% if page_obj.number == num %}
<a href="#" class="current">{{ num }}</a>
{% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
{% endif %}
{% elif num == 1 or num == paginator.num_pages %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
{% endif %}
{% elif num == page_obj.number|add:'-3' or num == page_obj.number|add:'3' %}
<span class="ellipsis">...</span>
{% endif %}
{% endfor %}
{% endwith %}
{% if page_obj.has_next %}
{% if selected_website %}
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
<a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.next_page_number }}">下一页</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
{% endif %}
{% if selected_website %}
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
<a href="?website=
{{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页
&raquo;</a>
{% else %}
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.next_page_number }}">下一页</a>
<a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页
&raquo;</a>
{% endif %}
{% endif %}
</div>
</div>
@@ -428,213 +353,6 @@
</div>
<script>
// 存储当前任务ID和检查状态的定时器
let currentTaskId = null;
let statusCheckInterval = null;
// 获取页面元素
const runCrawlerBtn = document.getElementById('runCrawlerBtn');
const runDongfangyancaoCrawlerBtn = document.getElementById('runDongfangyancaoCrawlerBtn');
const pauseCrawlerBtn = document.getElementById('pauseCrawlerBtn');
const progressContainer = document.getElementById('crawlerProgress');
const progressFill = document.getElementById('progressFill');
const progressText = document.getElementById('progressText');
const resultDiv = document.getElementById('crawlerResult');
// 绑定爬虫按钮事件
runCrawlerBtn.addEventListener('click', function() {
runCrawler('www.news.cn', 'crawl_xinhua');
});
runDongfangyancaoCrawlerBtn.addEventListener('click', function() {
runCrawler('东方烟草报', 'crawl_dongfangyancao');
});
// 暂停按钮事件
pauseCrawlerBtn.addEventListener('click', function() {
if (currentTaskId) {
pauseCrawler(currentTaskId);
}
});
// 运行爬虫函数
function runCrawler(websiteName, crawlerName) {
// 禁用按钮,防止重复点击
runCrawlerBtn.disabled = true;
runDongfangyancaoCrawlerBtn.disabled = true;
resultDiv.style.display = 'none';
// 显示进度区域
progressContainer.style.display = 'block';
updateProgress(0, '爬虫启动中...');
// 发送POST请求运行爬虫
fetch('{% url "run_crawler" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'crawler_name=' + crawlerName
})
.then(response => response.json())
.then(data => {
if (data.status === 'success') {
currentTaskId = data.task_id;
pauseCrawlerBtn.disabled = false;
// 启动轮询检查爬虫状态
checkCrawlerStatus(currentTaskId);
} else {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
progressContainer.style.display = 'none';
}
})
.catch(error => {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '请求失败: ' + error;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
progressContainer.style.display = 'none';
});
}
// 检查爬虫状态的函数
function checkCrawlerStatus(taskId) {
// 清除之前的定时器
if (statusCheckInterval) {
clearInterval(statusCheckInterval);
}
// 设置新的定时器,每秒检查一次状态
statusCheckInterval = setInterval(() => {
fetch('{% url "crawler_status" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'task_id=' + taskId
})
.then(response => response.json())
.then(data => {
if (data.status === 'running') {
// 更新进度信息(模拟进度)
const elapsedTime = new Date() - new Date(data.start_time);
const progress = Math.min(90, Math.floor(elapsedTime / 1000));
updateProgress(progress, data.message);
} else if (data.status === 'completed') {
// 爬虫完成,显示结果
clearInterval(statusCheckInterval);
updateProgress(100, data.message);
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
// 显示结果信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result success';
resultDiv.textContent = data.message;
// 3秒后自动隐藏进度条
setTimeout(() => {
progressContainer.style.display = 'none';
}, 3000);
// 自动刷新页面以显示新文章
setTimeout(() => {
location.reload();
}, 2000);
} else if (data.status === 'paused') {
// 爬虫暂停
clearInterval(statusCheckInterval);
updateProgress(data.progress || 0, '爬虫已暂停');
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
// 显示结果信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result success';
resultDiv.textContent = '爬虫已暂停';
} else if (data.status === 'error') {
// 爬虫出错
clearInterval(statusCheckInterval);
progressContainer.style.display = 'none';
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
}
})
.catch(error => {
clearInterval(statusCheckInterval);
progressContainer.style.display = 'none';
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '检查状态失败: ' + error;
// 恢复按钮状态
runCrawlerBtn.disabled = false;
runDongfangyancaoCrawlerBtn.disabled = false;
pauseCrawlerBtn.disabled = true;
});
}, 1000);
}
// 更新进度条函数
function updateProgress(percent, text) {
progressFill.style.width = percent + '%';
progressText.textContent = text;
}
// 暂停爬虫函数
function pauseCrawler(taskId) {
fetch('{% url "pause_crawler" %}', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'X-CSRFToken': '{{ csrf_token }}'
},
body: 'task_id=' + taskId
})
.then(response => response.json())
.then(data => {
if (data.status === 'success') {
// 暂停成功更新UI
pauseCrawlerBtn.disabled = true;
updateProgress(data.progress || 0, '正在暂停...');
} else {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = data.message;
}
})
.catch(error => {
// 显示错误信息
resultDiv.style.display = 'block';
resultDiv.className = 'crawler-result error';
resultDiv.textContent = '暂停请求失败: ' + error;
});
}
// 导出功能相关JavaScript
const checkboxes = document.querySelectorAll('.article-checkbox');
const exportJsonBtn = document.getElementById('exportJsonBtn');
@@ -643,7 +361,7 @@
const deselectAllBtn = document.getElementById('deselectAllBtn');
// 新增:获取ZIP导出按钮元素
const exportZipBtn = document.getElementById('exportZipBtn');
// 更新导出按钮状态
function updateExportButtons() {
const selectedCount = document.querySelectorAll('.article-checkbox:checked').length;
@@ -651,12 +369,12 @@
exportCsvBtn.disabled = selectedCount === 0;
exportZipBtn.disabled = selectedCount === 0; // 新增:更新ZIP导出按钮状态
}
// 为所有复选框添加事件监听器
checkboxes.forEach(checkbox => {
checkbox.addEventListener('change', updateExportButtons);
});
// 全选功能
selectAllBtn.addEventListener('click', () => {
checkboxes.forEach(checkbox => {
@@ -664,7 +382,7 @@
});
updateExportButtons();
});
// 取消全选功能
deselectAllBtn.addEventListener('click', () => {
checkboxes.forEach(checkbox => {
@@ -672,12 +390,12 @@
});
updateExportButtons();
});
// 导出为JSON功能
exportJsonBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value);
// 发送POST请求导出文章
fetch('{% url "export_articles" %}', {
method: 'POST',
@@ -690,32 +408,32 @@
format: 'json'
})
})
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.json';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.json';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
});
// 导出为CSV功能
exportCsvBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value);
// 发送POST请求导出文章
fetch('{% url "export_articles" %}', {
method: 'POST',
@@ -728,32 +446,32 @@
format: 'csv'
})
})
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.csv';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.csv';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
});
// 新增:导出为ZIP包功能
exportZipBtn.addEventListener('click', () => {
const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
.map(checkbox => checkbox.value);
// 发送POST请求导出文章为ZIP包
fetch('{% url "export_articles" %}', {
method: 'POST',
@@ -766,27 +484,27 @@
format: 'zip' // 指定导出格式为ZIP
})
})
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.zip';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
.then(response => {
if (response.ok) {
return response.blob();
}
throw new Error('导出失败');
})
.then(blob => {
const url = window.URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'articles.zip';
document.body.appendChild(a);
a.click();
window.URL.revokeObjectURL(url);
document.body.removeChild(a);
})
.catch(error => {
alert('导出失败: ' + error);
});
});
// 初始化导出按钮状态
updateExportButtons();
</script>

View File

@@ -9,7 +9,9 @@ urlpatterns = [
path('run-crawler/', views.run_crawler, name='run_crawler'),
# 新增:检查爬虫状态的路由
path('crawler-status/', views.crawler_status, name='crawler_status'),
# 新增:暂停爬虫的路由
path('pause-crawler/', views.pause_crawler, name='pause_crawler'),
# 添加导出文章的路由
path('export-articles/', views.export_articles, name='export_articles'),
# 添加自定义管理后台的路由
]
]

View File

@@ -12,7 +12,6 @@ import csv
from django.views.decorators.csrf import csrf_exempt
from django.utils import timezone
# 用于跟踪爬虫任务状态的全局字典
crawler_tasks = {}
@@ -73,7 +72,7 @@ def run_crawler(request):
# 生成任务ID
task_id = str(uuid.uuid4())
# 记录任务开始前的文章数量
initial_count = Article.objects.count()
@@ -87,18 +86,18 @@ def run_crawler(request):
'start_time': timezone.now(),
'initial_count': initial_count
}
# 根据爬虫名称调用相应的命令
if crawler_name in ['crawl_xinhua', 'crawl_dongfangyancao']:
call_command(crawler_name)
else:
# 如果是通用爬虫命令使用crawl_articles
call_command('crawl_articles', crawler_name)
# 计算新增文章数量
final_count = Article.objects.count()
added_count = final_count - initial_count
# 更新任务状态为完成
crawler_tasks[task_id] = {
'status': 'completed',
@@ -113,11 +112,11 @@ def run_crawler(request):
error_msg = "检测到重复文章URL已跳过重复项"
else:
print(f"爬虫执行出错: {e}")
# 计算实际新增文章数量(即使有错误也统计)
final_count = Article.objects.count()
added_count = final_count - initial_count
# 更新任务状态为完成(即使有部分错误)
crawler_tasks[task_id] = {
'status': 'completed',
@@ -147,17 +146,47 @@ def crawler_status(request):
task_id = request.POST.get('task_id', '')
if not task_id:
return JsonResponse({'status': 'error', 'message': '任务ID不能为空'})
# 获取任务状态
task_info = crawler_tasks.get(task_id)
if not task_info:
return JsonResponse({'status': 'error', 'message': '未找到任务'})
return JsonResponse(task_info)
except Exception as e:
return JsonResponse({'status': 'error', 'message': str(e)})
# 新增:暂停爬虫的视图
@require_http_methods(["POST"])
def pause_crawler(request):
"""
暂停爬虫任务
"""
try:
task_id = request.POST.get('task_id', '')
if not task_id:
return JsonResponse({'status': 'error', 'message': '任务ID不能为空'})
# 获取任务状态
task_info = crawler_tasks.get(task_id)
if not task_info:
return JsonResponse({'status': 'error', 'message': '未找到任务'})
# 在实际应用中,这里应该实现真正的暂停逻辑
# 目前我们只是更新任务状态来模拟暂停功能
task_info['status'] = 'paused'
task_info['message'] = '爬虫已暂停'
return JsonResponse({
'status': 'success',
'message': '爬虫已暂停',
'progress': 0 # 这里应该返回实际进度
})
except Exception as e:
return JsonResponse({'status': 'error', 'message': str(e)})
# 新增:文章导出视图
@csrf_exempt
@require_http_methods(["POST"])
@@ -167,13 +196,13 @@ def export_articles(request):
data = json.loads(request.body)
article_ids = data.get('article_ids', [])
format_type = data.get('format', 'json')
# 获取选中的文章
articles = Article.objects.filter(id__in=article_ids)
if not articles.exists():
return HttpResponse('没有选中文章', status=400)
# 根据格式类型导出
if format_type == 'json':
# 准备JSON数据
@@ -189,7 +218,7 @@ def export_articles(request):
'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
'media_files': article.media_files
})
# 创建JSON响应
response = HttpResponse(
json.dumps(articles_data, ensure_ascii=False, indent=2),
@@ -197,16 +226,16 @@ def export_articles(request):
)
response['Content-Disposition'] = 'attachment; filename="articles.json"'
return response
elif format_type == 'csv':
# 创建CSV响应
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="articles.csv"'
# 创建CSV写入器
writer = csv.writer(response)
writer.writerow(['ID', '标题', '网站', 'URL', '发布时间', '内容', '创建时间', '媒体文件'])
# 写入文章数据
for article in articles:
writer.writerow([
@@ -219,25 +248,25 @@ def export_articles(request):
article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
';'.join(article.media_files) if article.media_files else ''
])
return response
# 新增:支持ZIP格式导出
elif format_type == 'zip':
import zipfile
from io import BytesIO
from django.conf import settings
import os
# 创建内存中的ZIP文件
zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
# 为每篇文章创建Word文档并添加到ZIP文件中
for article in articles:
# 为每篇文章创建单独的文件夹
article_folder = f"article_{article.id}_{article.title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')}"
# 创建文章数据
article_data = {
'id': article.id,
@@ -249,7 +278,7 @@ def export_articles(request):
'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
'media_files': article.media_files
}
# 将文章数据保存为Word文件并添加到ZIP
try:
from docx import Document
@@ -257,23 +286,24 @@ def export_articles(request):
from io import BytesIO
from bs4 import BeautifulSoup
import requests
# 创建Word文档
doc = Document()
doc.add_heading(article.title, 0)
# 添加文章元数据
doc.add_paragraph(f"网站: {article.website.name}")
doc.add_paragraph(f"URL: {article.url}")
doc.add_paragraph(f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}")
doc.add_paragraph(
f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}")
doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
# 添加文章内容
doc.add_heading('内容', level=1)
# 处理HTML内容
soup = BeautifulSoup(article.content, 'html.parser')
# 处理内容中的图片
for img in soup.find_all('img'):
src = img.get('src', '')
@@ -293,13 +323,13 @@ def export_articles(request):
except Exception as e:
# 如果添加图片失败添加图片URL作为文本
doc.add_paragraph(f"[图片: {src}]")
# 移除原始img标签
img.decompose()
content_text = soup.get_text()
doc.add_paragraph(content_text)
# 添加媒体文件信息
if article.media_files:
doc.add_heading('媒体文件', level=1)
@@ -309,7 +339,7 @@ def export_articles(request):
if os.path.exists(full_path):
# 检查文件扩展名以确定处理方式
file_extension = os.path.splitext(media_file)[1].lower()
# 图片文件处理
if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
doc.add_picture(full_path, width=Inches(4.0))
@@ -324,7 +354,7 @@ def export_articles(request):
if media_file.startswith('http'):
response = requests.get(media_file, timeout=10)
file_extension = os.path.splitext(media_file)[1].lower()
# 图片文件处理
if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']:
image_stream = BytesIO(response.content)
@@ -335,20 +365,22 @@ def export_articles(request):
doc.add_paragraph(media_file)
except Exception as e:
doc.add_paragraph(media_file)
# 保存Word文档到内存
doc_buffer = BytesIO()
doc.save(doc_buffer)
doc_buffer.seek(0)
# 将Word文档添加到ZIP包
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'), doc_buffer.read())
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'),
doc_buffer.read())
except ImportError:
# 如果没有安装python-docx库回退到JSON格式
json_data = json.dumps(article_data, ensure_ascii=False, indent=2)
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.json'), json_data)
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.json'),
json_data)
# 添加媒体文件到ZIP包
if article.media_files:
for media_file in article.media_files:
@@ -362,19 +394,21 @@ def export_articles(request):
if media_file.startswith('http'):
import requests
response = requests.get(media_file, timeout=10)
zip_file.writestr(os.path.join(article_folder, 'media', os.path.basename(media_file)), response.content)
zip_file.writestr(
os.path.join(article_folder, 'media', os.path.basename(media_file)),
response.content)
except Exception as e:
# 如果添加媒体文件失败,继续处理其他文件
pass
# 创建HttpResponse
zip_buffer.seek(0)
response = HttpResponse(zip_buffer.getvalue(), content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename=articles_export.zip'
return response
else:
return HttpResponse('不支持的格式', status=400)
except Exception as e:
return HttpResponse(f'导出失败: {str(e)}', status=500)
return HttpResponse(f'导出失败: {str(e)}', status=500)