fix bugs

2025-08-13 21:35:11 +08:00
parent c618528a0a
commit 31d0525cd0
10 changed files with 243 additions and 897 deletions
--- a/core/admin.py
+++ b/core/admin.py
@@ -1,9 +1,6 @@
 from django.contrib import admin
 from django.contrib.admin import AdminSite
 from .models import Website, Article
 # 添加actions相关的导入
 from django.contrib import messages
 from django.http import HttpResponseRedirect
 # 添加导出功能所需导入
 import csv
 from django.http import HttpResponse
@@ -12,17 +9,7 @@ import json
 from django.shortcuts import render, redirect
 from django.urls import path
 from django.contrib import admin
 from django.http import JsonResponse
 from django.views.decorators.http import require_http_methods
 from django.core.management import call_command
 import threading
 import uuid
 from django.utils import timezone
 # 创建自定义管理站点
 # 实例化管理站点
 # 添加运行爬虫的视图函数
 def run_crawler_view(request):
@@ -36,21 +23,24 @@ def run_crawler_view(request):
            return redirect('admin:core_article_changelist')
        try:
-            # 根据网站名称确定要执行的爬虫命令
+            # 动态获取网站对象
-            if website_name == 'crawl_xinhua':
+            website = Website.objects.get(name=website_name)
                crawler_name = 'crawl_xinhua'
            elif website_name == 'crawl_dongfangyancao':
                crawler_name = 'crawl_dongfangyancao'
            elif website_name == 'crawl_articles':
                crawler_name = 'crawl_articles'
            else:
                # 对于其他网站，使用通用爬虫命令
                crawler_name = 'crawl_articles'
-            # 运行爬虫命令，不传递website_name作为参数
+            # 根据网站对象确定要执行的爬虫命令
-            call_command(crawler_name)
+            # 移除默认的通用爬虫，每个网站必须配置自己的爬虫命令
            crawler_name = getattr(website, 'crawler_command', None)
-            messages.success(request, f'成功执行爬虫: {crawler_name}')
+            # 如果网站没有配置爬虫命令，则报错
            if not crawler_name:
                messages.error(request, f'网站 {website_name} 未配置爬虫命令')
                return redirect('admin:core_article_changelist')
            # 运行爬虫命令，传递网站名称
            call_command(crawler_name, website_name)
            messages.success(request, f'成功执行爬虫: {website_name}')
        except Website.DoesNotExist:
            messages.error(request, f'网站不存在: {website_name}')
        except Exception as e:
            messages.error(request, f'执行爬虫失败: {str(e)}')
@@ -71,6 +61,10 @@ class ArticleAdmin(admin.ModelAdmin):
    actions = ['delete_selected_articles', 'export_as_csv', 'export_as_json',
               'export_as_word', 'export_with_media']
    def get_websites(self):
        """获取所有启用的网站"""
        return Website.objects.filter(enabled=True)
    # 重写get_urls方法，添加自定义URL
    def get_urls(self):
        urls = super().get_urls()
--- a/core/management/commands/crawl_cngov.py
+++ b/core/management/commands/crawl_cngov.py
@@ -8,7 +8,7 @@ class Command(BaseCommand):
    def handle(self, *args, **kwargs):
        website, created = Website.objects.get_or_create(
-            name="www.gov.cn",
+            name="中国政府网",
            defaults={
                'article_list_url': 'https://www.gov.cn/',
                'article_selector': 'a'
--- a/core/management/commands/crawl_rmrb.py
+++ b/core/management/commands/crawl_rmrb.py
@@ -0,0 +1,26 @@
 from django.core.management.base import BaseCommand
 from core.models import Website
 from core.utils import full_site_crawler
 class Command(BaseCommand):
    help = "全站递归爬取 人民日报 https://www.peopleapp.com"
    def handle(self, *args, **kwargs):
        website, created = Website.objects.get_or_create(
            name="人民日报",
            defaults={
                'article_list_url': 'https://www.peopleapp.com/home',
                'article_selector': 'a',
                'base_url': 'https://www.peopleapp.com'
            }
        )
        # 确保更新已存在的网站对象的base_url
        if not created and not website.base_url:
            website.base_url = 'https://www.peopleapp.com'
            website.save()
        start_url = "https://www.peopleapp.com/home"
        self.stdout.write(f"开始全站爬取: {start_url}")
        full_site_crawler(start_url, website, max_pages=500)
        self.stdout.write("爬取完成")
--- a/core/management/commands/crawl_xinhua.py
+++ b/core/management/commands/crawl_xinhua.py
@@ -8,7 +8,7 @@ class Command(BaseCommand):
    def handle(self, *args, **kwargs):
        website, created = Website.objects.get_or_create(
-            name="www.news.cn",
+            name="新华网",
            defaults={
                'article_list_url': 'https://www.news.cn/',
                'article_selector': 'a'
--- a/core/management/commands/export_articles.py
+++ b/core/management/commands/export_articles.py
@@ -4,7 +4,6 @@ import json
 import csv
 import os
 from django.conf import settings
 from django.core.files.storage import default_storage
 import zipfile
 from django.utils import timezone
--- a/core/templates/admin/core/article/change_list.html
+++ b/core/templates/admin/core/article/change_list.html
@@ -9,9 +9,9 @@
            <label for="website-select">选择网站:</label>
            <select name="website_name" id="website-select" required>
                <option value="">-- 请选择网站 --</option>
-                <option value="crawl_xinhua">新华网</option>
+                {% for website in cl.model_admin.get_websites %}
-                <option value="crawl_dongfangyancao">东方烟草报</option>
+                    <option value="{{ website.name }}">{{ website.name }}</option>
-                <option value="crawl_articles">通用爬虫</option>
+                {% endfor %}
            </select>
            <input type="submit" value="执行爬虫" class="default" style="margin-left: 10px;"/>
        </form>
--- a/core/templates/core/article_detail.html
+++ b/core/templates/core/article_detail.html
@@ -2,7 +2,8 @@
 <html lang="zh">
 <head>
    <meta charset="UTF-8"/>
-    <title>绿色课堂文章列表</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
    <title>{{ article.title }} - 绿色课堂</title>
    <style>
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
@@ -10,15 +11,16 @@
            color: #333;
            margin: 0 auto;
            padding: 20px;
-            background-color: #f0f8ff; /* 统一背景色调 */
+            background-color: #f0f8ff;
            max-width: 800px;
        }
        .container {
            background: white;
            padding: 30px;
            margin-bottom: 20px;
-            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); /* 添加轻微阴影 */
+            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
-            border-radius: 8px; /* 添加圆角 */
+            border-radius: 8px;
        }
        h1 {
@@ -28,765 +30,59 @@
            margin-top: 0;
        }
-        .filters {
+        .meta {
            color: #78909c;
            font-size: 0.9em;
            margin-bottom: 20px;
            padding: 15px;
            background-color: #e3f2fd; /* 统一滤镜背景色调 */
            border-radius: 5px;
        }
-        .filters a {
+        .content {
            margin-top: 20px;
        }
        .content img {
            max-width: 100%;
            height: auto;
        }
        .back-link {
            display: inline-block;
-            padding: 5px 10px;
+            margin-bottom: 20px;
-            margin: 0 5px 5px 0;
+            color: #1976d2;
-            background-color: #bbdefb; /* 统一链接背景色调 */
+            text-decoration: none;
        }
        .back-link:hover {
            color: #0d47a1;
            text-decoration: none;
            border-radius: 3px;
        }
        .filters a.active {
            background-color: #3498db;
            color: white;
        }
        ul {
            list-style: none;
            padding: 0;
        }
        li {
            padding: 10px 0;
            border-bottom: 1px solid #e0e0e0; /* 统一分隔线颜色 */
        }
        li:last-child {
            border-bottom: none;
        }
        a {
            color: #1976d2; /* 统一链接颜色 */
            text-decoration: none;
        }
        a:hover {
            color: #0d47a1; /* 统一悬停颜色 */
            text-decoration: underline;
        }
-        .meta {
+        @media (max-width: 600px) {
-            color: #78909c; /* 统一元数据颜色 */
+            body {
            font-size: 0.9em;
        }
        .pagination {
            margin-top: 30px;
            text-align: center;
            padding: 20px 0;
        }
        .pagination a {
            display: inline-block;
            padding: 8px 16px;
            background-color: #3498db;
            color: white;
            text-decoration: none;
            border-radius: 4px;
            margin: 0 2px; /* 修改:调整页码间距 */
        }
        .pagination a:hover {
            background-color: #2980b9;
        }
        .pagination span {
            margin: 0 10px;
            color: #7f8c8d;
        }
        /* 新增:当前页码样式 */
        .pagination .current {
            background-color: #2980b9;
            cursor: default;
        }
        /* 新增:省略号样式 */
        .pagination .ellipsis {
            display: inline-block;
            padding: 8px 4px;
            color: #7f8c8d;
        }
        /* 新增:搜索框样式 */
        .search-form {
            margin-bottom: 20px;
            padding: 15px;
            background-color: #e3f2fd; /* 统一搜索框背景色调 */
            border-radius: 5px;
        }
        .search-form input[type="text"] {
            padding: 8px 12px;
            border: 1px solid #bbdefb; /* 统一边框颜色 */
            border-radius: 4px;
            width: 300px;
            margin-right: 10px;
            background-color: #fff;
        }
        .search-form input[type="submit"] {
            padding: 8px 16px;
            background-color: #3498db;
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
        }
        .search-form input[type="submit"]:hover {
            background-color: #2980b9;
        }
        .search-info {
            color: #78909c; /* 统一搜索信息颜色 */
            font-size: 0.9em;
            margin-bottom: 10px;
        }
        /* 新增:左侧筛选栏样式 */
        .content-wrapper {
            display: flex;
            gap: 20px;
        }
        .sidebar {
            flex: 0 0 200px;
            background-color: #e3f2fd; /* 统一边栏背景色调 */
            border-radius: 5px;
            padding: 15px;
        }
        .main-content {
            flex: 1;
        }
        .sidebar .filters {
            margin-bottom: 20px;
            padding: 0;
            background-color: transparent;
        }
        .sidebar .filters strong {
            display: block;
            margin-bottom: 10px;
            color: #2c3e50;
        }
        .sidebar .filters a {
            display: block;
            padding: 8px 10px;
            margin: 0 0 5px 0;
            background-color: #bbdefb; /* 统一边栏链接背景色调 */
            color: #0d47a1;
            text-decoration: none;
            border-radius: 3px;
        }
        .sidebar .filters a.active {
            background-color: #3498db;
            color: white;
        }
        /* 新增:导出功能样式 */
        .export-section {
            margin-bottom: 20px;
            padding: 15px;
            background-color: #e8f5e9; /* 统一导出区域背景色调 */
            border-radius: 5px;
            text-align: center;
        }
        .export-btn {
            padding: 10px 20px;
            background-color: #4caf50; /* 统一按钮背景色调 */
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            font-size: 16px;
            margin: 0 5px;
        }
        .export-btn:hover {
            background-color: #388e3c; /* 统一按钮悬停色调 */
        }
        .export-btn:disabled {
            background-color: #9e9e9e; /* 统一禁用按钮色调 */
            cursor: not-allowed;
        }
        .article-checkbox {
            margin-right: 10px;
        }
        /* 新增:爬虫控制按钮样式 */
        .crawler-control {
            margin-bottom: 20px;
            padding: 15px;
            background-color: #fff3e0; /* 统一爬虫控制区域背景色调 */
            border-radius: 5px;
            text-align: center;
        }
        .crawler-btn {
            padding: 10px 20px;
            background-color: #ff9800; /* 统一爬虫按钮背景色调 */
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            font-size: 16px;
            margin: 0 5px;
        }
        .crawler-btn:hover {
            background-color: #f57c00; /* 统一爬虫按钮悬停色调 */
        }
        .crawler-btn:disabled {
            background-color: #9e9e9e; /* 统一禁用爬虫按钮色调 */
            cursor: not-allowed;
        }
        .crawler-result {
            margin-top: 10px;
                padding: 10px;
            border-radius: 4px;
            display: none;
            }
-        .crawler-result.success {
+            .container {
-            background-color: #e8f5e9;
+                padding: 15px;
            color: #2e7d32;
            }
        .crawler-result.error {
            background-color: #ffebee;
            color: #c62828;
        }
        /* 新增:进度条样式 */
        .progress-container {
            margin-top: 10px;
            display: none;
        }
        .progress-bar {
            width: 100%;
            height: 20px;
            background-color: #e0e0e0;
            border-radius: 10px;
            overflow: hidden;
        }
        .progress-fill {
            height: 100%;
            background-color: #4caf50;
            width: 0%;
            transition: width 0.3s ease;
        }
        .progress-text {
            margin-top: 5px;
            font-size: 14px;
            color: #666;
        }
    </style>
 </head>
 <body>
-<div class="container">
+    <div class="container">
-    <h1>绿色课堂文章列表</h1>
+        <a href="{% url 'article_list' %}" class="back-link">&laquo; 返回文章列表</a>
-    <!-- 新增:搜索表单 -->
+        <h1>{{ article.title }}</h1>
-    <div class="search-form">
+
-        <form method="get">
+        <div class="meta">
-            <input type="text" name="q" placeholder="输入关键词搜索文章..." value="{{ search_query }}">
+            网站: {{ article.website.name }} |
-            {% if selected_website %}
+            发布时间: {{ article.pub_date|date:"Y-m-d H:i" }} |
-                <input type="hidden" name="website" value="{{ selected_website.id }}">
+            创建时间: {{ article.created_at|date:"Y-m-d H:i" }}
            {% endif %}
            <input type="submit" value="搜索">
        </form>
        </div>
-    <div class="content-wrapper">
+        <div class="content">
-        <!-- 左侧筛选栏 -->
+            {{ article.content|safe }}
        <div class="sidebar">
            <div class="filters">
                <strong>按网站筛选：</strong>
                <a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}"
                   {% if not selected_website %}class="active" {% endif %}>全部</a>
                {% for website in websites %}
                    <a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}"
                       {% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
                {% endfor %}
        </div>
    </div>
        <!-- 主内容区域 -->
        <div class="main-content">
            <!-- 新增:搜索结果信息 -->
            {% if search_query %}
                <div class="search-info">
                    搜索 "{{ search_query }}" 找到 {{ page_obj.paginator.count }} 篇文章
                    <a href="{% if selected_website %}?website={{ selected_website.id }}{% else %}{% url 'article_list' %}{% endif %}">清除搜索</a>
                </div>
            {% endif %}
            <!-- 新增:导出功能 -->
            <div class="export-section">
                <button id="selectAllBtn" class="export-btn">全选</button>
                <button id="deselectAllBtn" class="export-btn">取消全选</button>
                <button id="exportJsonBtn" class="export-btn" disabled>导出为JSON</button>
                <button id="exportCsvBtn" class="export-btn" disabled>导出为CSV</button>
                <!-- 新增:导出为ZIP包按钮 -->
                <button id="exportZipBtn" class="export-btn" disabled>导出为ZIP包</button>
            </div>
            <ul>
                {% for article in page_obj %}
                    <li>
                        <input type="checkbox" class="article-checkbox" value="{{ article.id }}"
                               id="article_{{ article.id }}">
                        <a href="{% url 'article_detail' article.id %}">{{ article.title }}</a>
                        <div class="meta">({{ article.website.name }} - {{ article.created_at|date:"Y-m-d" }})</div>
                    </li>
                    {% empty %}
                    <li>暂无文章</li>
                {% endfor %}
            </ul>
            <div class="pagination">
                {% if page_obj.has_previous %}
                    {% if selected_website %}
                        <a href="?website=
                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo;
                            首页</a>
                        <a href="?website=
                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
                    {% else %}
                        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page=1">&laquo; 首页</a>
                        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.previous_page_number }}">上一页</a>
                    {% endif %}
                {% endif %}
                <span>第 {{ page_obj.number }} 页，共 {{ page_obj.paginator.num_pages }} 页</span>
                <!-- 修改:优化页码显示逻辑 -->
                {% with page_obj.paginator as paginator %}
                    {% for num in paginator.page_range %}
                        {% if page_obj.number == num %}
                            <a href="#" class="current">{{ num }}</a>
                        {% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
                            {% if selected_website %}
                                <a href="?website=
                                        {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
                            {% else %}
                                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
                            {% endif %}
                        {% elif num == 1 or num == paginator.num_pages %}
                            {% if selected_website %}
                                <a href="?website=
                                        {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
                            {% else %}
                                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
                            {% endif %}
                        {% elif num == page_obj.number|add:'-3' or num == page_obj.number|add:'3' %}
                            <span class="ellipsis">...</span>
                        {% endif %}
                    {% endfor %}
                {% endwith %}
                {% if page_obj.has_next %}
                    {% if selected_website %}
                        <a href="?website=
                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
                        <a href="?website=
                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页
                            &raquo;</a>
                    {% else %}
                        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.next_page_number }}">下一页</a>
                        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页
                            &raquo;</a>
                    {% endif %}
                {% endif %}
            </div>
        </div>
    </div>
 </div>
 <script>
    // 存储当前任务ID和检查状态的定时器
    let currentTaskId = null;
    let statusCheckInterval = null;
    // 获取页面元素
    const runCrawlerBtn = document.getElementById('runCrawlerBtn');
    const runDongfangyancaoCrawlerBtn = document.getElementById('runDongfangyancaoCrawlerBtn');
    const pauseCrawlerBtn = document.getElementById('pauseCrawlerBtn');
    const progressContainer = document.getElementById('crawlerProgress');
    const progressFill = document.getElementById('progressFill');
    const progressText = document.getElementById('progressText');
    const resultDiv = document.getElementById('crawlerResult');
    // 绑定爬虫按钮事件
    runCrawlerBtn.addEventListener('click', function () {
        runCrawler('www.news.cn', 'crawl_xinhua');
    });
    runDongfangyancaoCrawlerBtn.addEventListener('click', function () {
        runCrawler('东方烟草报', 'crawl_dongfangyancao');
    });
    // 暂停按钮事件
    pauseCrawlerBtn.addEventListener('click', function () {
        if (currentTaskId) {
            pauseCrawler(currentTaskId);
        }
    });
    // 运行爬虫函数
    function runCrawler(websiteName, crawlerName) {
        // 禁用按钮，防止重复点击
        runCrawlerBtn.disabled = true;
        runDongfangyancaoCrawlerBtn.disabled = true;
        resultDiv.style.display = 'none';
        // 显示进度区域
        progressContainer.style.display = 'block';
        updateProgress(0, '爬虫启动中...');
        // 发送POST请求运行爬虫
        fetch('{% url "run_crawler" %}', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/x-www-form-urlencoded',
                'X-CSRFToken': '{{ csrf_token }}'
            },
            body: 'crawler_name=' + crawlerName
        })
            .then(response => response.json())
            .then(data => {
                if (data.status === 'success') {
                    currentTaskId = data.task_id;
                    pauseCrawlerBtn.disabled = false;
                    // 启动轮询检查爬虫状态
                    checkCrawlerStatus(currentTaskId);
                } else {
                    // 显示错误信息
                    resultDiv.style.display = 'block';
                    resultDiv.className = 'crawler-result error';
                    resultDiv.textContent = data.message;
                    // 恢复按钮状态
                    runCrawlerBtn.disabled = false;
                    runDongfangyancaoCrawlerBtn.disabled = false;
                    progressContainer.style.display = 'none';
                }
            })
            .catch(error => {
                // 显示错误信息
                resultDiv.style.display = 'block';
                resultDiv.className = 'crawler-result error';
                resultDiv.textContent = '请求失败: ' + error;
                // 恢复按钮状态
                runCrawlerBtn.disabled = false;
                runDongfangyancaoCrawlerBtn.disabled = false;
                progressContainer.style.display = 'none';
            });
    }
    // 检查爬虫状态的函数
    function checkCrawlerStatus(taskId) {
        // 清除之前的定时器
        if (statusCheckInterval) {
            clearInterval(statusCheckInterval);
        }
        // 设置新的定时器，每秒检查一次状态
        statusCheckInterval = setInterval(() => {
            fetch('{% url "crawler_status" %}', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/x-www-form-urlencoded',
                    'X-CSRFToken': '{{ csrf_token }}'
                },
                body: 'task_id=' + taskId
            })
                .then(response => response.json())
                .then(data => {
                    if (data.status === 'running') {
                        // 更新进度信息（模拟进度）
                        const elapsedTime = new Date() - new Date(data.start_time);
                        const progress = Math.min(90, Math.floor(elapsedTime / 1000));
                        updateProgress(progress, data.message);
                    } else if (data.status === 'completed') {
                        // 爬虫完成，显示结果
                        clearInterval(statusCheckInterval);
                        updateProgress(100, data.message);
                        // 恢复按钮状态
                        runCrawlerBtn.disabled = false;
                        runDongfangyancaoCrawlerBtn.disabled = false;
                        pauseCrawlerBtn.disabled = true;
                        // 显示结果信息
                        resultDiv.style.display = 'block';
                        resultDiv.className = 'crawler-result success';
                        resultDiv.textContent = data.message;
                        // 3秒后自动隐藏进度条
                        setTimeout(() => {
                            progressContainer.style.display = 'none';
                        }, 3000);
                        // 自动刷新页面以显示新文章
                        setTimeout(() => {
                            location.reload();
                        }, 2000);
                    } else if (data.status === 'paused') {
                        // 爬虫暂停
                        clearInterval(statusCheckInterval);
                        updateProgress(data.progress || 0, '爬虫已暂停');
                        // 恢复按钮状态
                        runCrawlerBtn.disabled = false;
                        runDongfangyancaoCrawlerBtn.disabled = false;
                        pauseCrawlerBtn.disabled = true;
                        // 显示结果信息
                        resultDiv.style.display = 'block';
                        resultDiv.className = 'crawler-result success';
                        resultDiv.textContent = '爬虫已暂停';
                    } else if (data.status === 'error') {
                        // 爬虫出错
                        clearInterval(statusCheckInterval);
                        progressContainer.style.display = 'none';
                        resultDiv.style.display = 'block';
                        resultDiv.className = 'crawler-result error';
                        resultDiv.textContent = data.message;
                        // 恢复按钮状态
                        runCrawlerBtn.disabled = false;
                        runDongfangyancaoCrawlerBtn.disabled = false;
                        pauseCrawlerBtn.disabled = true;
                    }
                })
                .catch(error => {
                    clearInterval(statusCheckInterval);
                    progressContainer.style.display = 'none';
                    resultDiv.style.display = 'block';
                    resultDiv.className = 'crawler-result error';
                    resultDiv.textContent = '检查状态失败: ' + error;
                    // 恢复按钮状态
                    runCrawlerBtn.disabled = false;
                    runDongfangyancaoCrawlerBtn.disabled = false;
                    pauseCrawlerBtn.disabled = true;
                });
        }, 1000);
    }
    // 更新进度条函数
    function updateProgress(percent, text) {
        progressFill.style.width = percent + '%';
        progressText.textContent = text;
    }
    // 暂停爬虫函数
    function pauseCrawler(taskId) {
        fetch('{% url "pause_crawler" %}', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/x-www-form-urlencoded',
                'X-CSRFToken': '{{ csrf_token }}'
            },
            body: 'task_id=' + taskId
        })
            .then(response => response.json())
            .then(data => {
                if (data.status === 'success') {
                    // 暂停成功，更新UI
                    pauseCrawlerBtn.disabled = true;
                    updateProgress(data.progress || 0, '正在暂停...');
                } else {
                    // 显示错误信息
                    resultDiv.style.display = 'block';
                    resultDiv.className = 'crawler-result error';
                    resultDiv.textContent = data.message;
                }
            })
            .catch(error => {
                // 显示错误信息
                resultDiv.style.display = 'block';
                resultDiv.className = 'crawler-result error';
                resultDiv.textContent = '暂停请求失败: ' + error;
            });
    }
    // 导出功能相关JavaScript
    const checkboxes = document.querySelectorAll('.article-checkbox');
    const exportJsonBtn = document.getElementById('exportJsonBtn');
    const exportCsvBtn = document.getElementById('exportCsvBtn');
    const selectAllBtn = document.getElementById('selectAllBtn');
    const deselectAllBtn = document.getElementById('deselectAllBtn');
    // 新增:获取ZIP导出按钮元素
    const exportZipBtn = document.getElementById('exportZipBtn');
    // 更新导出按钮状态
    function updateExportButtons() {
        const selectedCount = document.querySelectorAll('.article-checkbox:checked').length;
        exportJsonBtn.disabled = selectedCount === 0;
        exportCsvBtn.disabled = selectedCount === 0;
        exportZipBtn.disabled = selectedCount === 0; // 新增:更新ZIP导出按钮状态
    }
    // 为所有复选框添加事件监听器
    checkboxes.forEach(checkbox => {
        checkbox.addEventListener('change', updateExportButtons);
    });
    // 全选功能
    selectAllBtn.addEventListener('click', () => {
        checkboxes.forEach(checkbox => {
            checkbox.checked = true;
        });
        updateExportButtons();
    });
    // 取消全选功能
    deselectAllBtn.addEventListener('click', () => {
        checkboxes.forEach(checkbox => {
            checkbox.checked = false;
        });
        updateExportButtons();
    });
    // 导出为JSON功能
    exportJsonBtn.addEventListener('click', () => {
        const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
            .map(checkbox => checkbox.value);
        // 发送POST请求导出文章
        fetch('{% url "export_articles" %}', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json',
                'X-CSRFToken': '{{ csrf_token }}'
            },
            body: JSON.stringify({
                article_ids: selectedArticles,
                format: 'json'
            })
        })
            .then(response => {
                if (response.ok) {
                    return response.blob();
                }
                throw new Error('导出失败');
            })
            .then(blob => {
                const url = window.URL.createObjectURL(blob);
                const a = document.createElement('a');
                a.href = url;
                a.download = 'articles.json';
                document.body.appendChild(a);
                a.click();
                window.URL.revokeObjectURL(url);
                document.body.removeChild(a);
            })
            .catch(error => {
                alert('导出失败: ' + error);
            });
    });
    // 导出为CSV功能
    exportCsvBtn.addEventListener('click', () => {
        const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
            .map(checkbox => checkbox.value);
        // 发送POST请求导出文章
        fetch('{% url "export_articles" %}', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json',
                'X-CSRFToken': '{{ csrf_token }}'
            },
            body: JSON.stringify({
                article_ids: selectedArticles,
                format: 'csv'
            })
        })
            .then(response => {
                if (response.ok) {
                    return response.blob();
                }
                throw new Error('导出失败');
            })
            .then(blob => {
                const url = window.URL.createObjectURL(blob);
                const a = document.createElement('a');
                a.href = url;
                a.download = 'articles.csv';
                document.body.appendChild(a);
                a.click();
                window.URL.revokeObjectURL(url);
                document.body.removeChild(a);
            })
            .catch(error => {
                alert('导出失败: ' + error);
            });
    });
    // 新增:导出为ZIP包功能
    exportZipBtn.addEventListener('click', () => {
        const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
            .map(checkbox => checkbox.value);
        // 发送POST请求导出文章为ZIP包
        fetch('{% url "export_articles" %}', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json',
                'X-CSRFToken': '{{ csrf_token }}'
            },
            body: JSON.stringify({
                article_ids: selectedArticles,
                format: 'zip' // 指定导出格式为ZIP
            })
        })
            .then(response => {
                if (response.ok) {
                    return response.blob();
                }
                throw new Error('导出失败');
            })
            .then(blob => {
                const url = window.URL.createObjectURL(blob);
                const a = document.createElement('a');
                a.href = url;
                a.download = 'articles.zip';
                document.body.appendChild(a);
                a.click();
                window.URL.revokeObjectURL(url);
                document.body.removeChild(a);
            })
            .catch(error => {
                alert('导出失败: ' + error);
            });
    });
    // 初始化导出按钮状态
    updateExportButtons();
 </script>
 </body>
 </html>
--- a/core/templates/core/article_list.html
+++ b/core/templates/core/article_list.html
@@ -17,7 +17,7 @@
            background: white;
            padding: 30px;
            margin-bottom: 20px;
-            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); /* 添加轻微阴影 */
+            box-shadow: 0 2px 5px rgba(0,0,0,0.05); /* 添加轻微阴影 */
            border-radius: 8px; /* 添加圆角 */
        }
@@ -251,11 +251,9 @@
        <div class="sidebar">
            <div class="filters">
                <strong>按网站筛选：</strong>
-                <a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}"
+                <a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}" {% if not selected_website %}class="active" {% endif %}>全部</a>
                   {% if not selected_website %}class="active" {% endif %}>全部</a>
                {% for website in websites %}
-                    <a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}"
+                <a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}" {% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
                       {% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
                {% endfor %}
            </div>
        </div>
@@ -283,8 +281,7 @@
            <ul>
                {% for article in page_obj %}
                <li>
-                        <input type="checkbox" class="article-checkbox" value="{{ article.id }}"
+                    <input type="checkbox" class="article-checkbox" value="{{ article.id }}" id="article_{{ article.id }}">
                               id="article_{{ article.id }}">
                    <a href="{% url 'article_detail' article.id %}">{{ article.title }}</a>
                    <div class="meta">({{ article.website.name }} - {{ article.created_at|date:"Y-m-d" }})</div>
                </li>
@@ -296,11 +293,8 @@
            <div class="pagination">
                {% if page_obj.has_previous %}
                {% if selected_website %}
-                        <a href="?website=
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo; 首页</a>
-                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo;
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
                            首页</a>
                        <a href="?website=
                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
                {% else %}
                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page=1">&laquo; 首页</a>
                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.previous_page_number }}">上一页</a>
@@ -316,15 +310,13 @@
                <a href="#" class="current">{{ num }}</a>
                {% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
                {% if selected_website %}
-                                <a href="?website=
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
                                        {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
                {% else %}
                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
                {% endif %}
                {% elif num == 1 or num == paginator.num_pages %}
                {% if selected_website %}
-                                <a href="?website=
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
                                        {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
                {% else %}
                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
                {% endif %}
@@ -336,15 +328,11 @@
                {% if page_obj.has_next %}
                {% if selected_website %}
-                        <a href="?website=
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
-                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
                        <a href="?website=
                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页
                            &raquo;</a>
                {% else %}
                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.next_page_number }}">下一页</a>
-                        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页
+                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
                            &raquo;</a>
                {% endif %}
                {% endif %}
            </div>
--- a/core/utils.py
+++ b/core/utils.py
@@ -107,6 +107,17 @@ def process_article(url, website):
                soup.find("div", id="content") or
                soup.find("div", class_="mainBody")
        )
    elif website.name == "人民日报":
        # 人民日报网站的文章结构处理
        title_tag = soup.find("h1") or soup.find("title")
        # 查找主要内容区域
        content_tag = (
                soup.find("div", class_="content") or
                soup.find("div", class_="article-content") or
                soup.find("div", id="content") or
                soup.find("div", class_="text") or
                soup.find("section", class_="content")
        )
    else:
        # 默认处理方式
        title_tag = soup.find("h1") or soup.find("title")
@@ -256,6 +267,24 @@ def full_site_crawler(start_url, website, max_pages=1000):
                    ("/xinwen/" in path) or
                    ("/huoban/" in path)
            )
        elif website.name == "人民日报":
            # 人民日报的文章页面判断逻辑
            parsed_url = urlparse(url)
            path = parsed_url.path
            # 修改: 增加更准确的文章页面判断逻辑
            is_article_page = (
                    (soup.find("div", class_="content") is not None and 
                     soup.find("h1") is not None) or
                    soup.find("div", class_="article-content") is not None or
                    (soup.find("div", id="content") is not None and 
                     soup.find("h1") is not None) or
                    soup.find("div", class_="text") is not None or
                    soup.find("section", class_="content") is not None or
                    ("/article/" in path) or
                    (path.startswith("/detail/") and len(path) > 10) or
                    # 增加对peopleapp.com特定文章路径的判断
                    ("/dynamic/" in path and "article" in path)
            )
        else:
            # 默认判断逻辑
            is_article_page = (
@@ -271,5 +300,18 @@ def full_site_crawler(start_url, website, max_pages=1000):
        # 扩展队列，发现新链接
        for link in soup.find_all("a", href=True):
            href = urljoin(url, link["href"])
-            if href not in visited and is_valid_url(href, base_netloc):
+            # 对于人民日报网站，我们扩展链接发现逻辑
            if website.name == "人民日报":
                # 允许爬取以https://www.peopleapp.com/开头的链接
                if href.startswith("https://www.peopleapp.com/") and href not in visited:
                    # 增加对文章链接的识别
                    parsed_href = urlparse(href)
                    href_path = parsed_href.path
                    # 添加更多可能的文章链接模式
                    if ("/article/" in href_path or 
                        href_path.startswith("/detail/") or 
                        ("/dynamic/" in href_path and "article" in href_path) or
                        href_path.count("/") > 2):  # 更深层的页面可能是文章页
                        queue.append(href)
            elif href not in visited and is_valid_url(href, base_netloc):
                queue.append(href)
--- a/core/views.py
+++ b/core/views.py
@@ -22,6 +22,7 @@ def article_list(request):
    # 获取筛选网站
    selected_website = None
    # 修改：确保始终获取所有文章，除非有特定筛选
    articles = Article.objects.all()
    website_id = request.GET.get('website')