fix bugs

2025-08-13 21:35:11 +08:00
parent c618528a0a
commit 31d0525cd0
10 changed files with 243 additions and 897 deletions
--- a/core/admin.py
+++ b/core/admin.py
@@ -1,9 +1,6 @@
-from django.contrib import admin
-from django.contrib.admin import AdminSite
 from .models import Website, Article
 # 添加actions相关的导入
 from django.contrib import messages
-from django.http import HttpResponseRedirect
 # 添加导出功能所需导入
 import csv
 from django.http import HttpResponse
@@ -12,17 +9,7 @@ import json
 from django.shortcuts import render, redirect
 from django.urls import path
 from django.contrib import admin
-from django.http import JsonResponse
-from django.views.decorators.http import require_http_methods
 from django.core.management import call_command
-import threading
-import uuid
-from django.utils import timezone
-
-
-# 创建自定义管理站点
-
-# 实例化管理站点

 # 添加运行爬虫的视图函数
 def run_crawler_view(request):
@@ -36,21 +23,24 @@ def run_crawler_view(request):
            return redirect('admin:core_article_changelist')

        try:
-            # 根据网站名称确定要执行的爬虫命令
-            if website_name == 'crawl_xinhua':
-                crawler_name = 'crawl_xinhua'
-            elif website_name == 'crawl_dongfangyancao':
-                crawler_name = 'crawl_dongfangyancao'
-            elif website_name == 'crawl_articles':
-                crawler_name = 'crawl_articles'
-            else:
-                # 对于其他网站，使用通用爬虫命令
-                crawler_name = 'crawl_articles'
+            # 动态获取网站对象
+            website = Website.objects.get(name=website_name)
            
-            # 运行爬虫命令，不传递website_name作为参数
-            call_command(crawler_name)
+            # 根据网站对象确定要执行的爬虫命令
+            # 移除默认的通用爬虫，每个网站必须配置自己的爬虫命令
+            crawler_name = getattr(website, 'crawler_command', None)
            
-            messages.success(request, f'成功执行爬虫: {crawler_name}')
+            # 如果网站没有配置爬虫命令，则报错
+            if not crawler_name:
+                messages.error(request, f'网站 {website_name} 未配置爬虫命令')
+                return redirect('admin:core_article_changelist')
+
+            # 运行爬虫命令，传递网站名称
+            call_command(crawler_name, website_name)
+
+            messages.success(request, f'成功执行爬虫: {website_name}')
+        except Website.DoesNotExist:
+            messages.error(request, f'网站不存在: {website_name}')
        except Exception as e:
            messages.error(request, f'执行爬虫失败: {str(e)}')

@@ -71,6 +61,10 @@ class ArticleAdmin(admin.ModelAdmin):
    actions = ['delete_selected_articles', 'export_as_csv', 'export_as_json',
               'export_as_word', 'export_with_media']

+    def get_websites(self):
+        """获取所有启用的网站"""
+        return Website.objects.filter(enabled=True)
+
    # 重写get_urls方法，添加自定义URL
    def get_urls(self):
        urls = super().get_urls()
--- a/core/management/commands/crawl_cngov.py
+++ b/core/management/commands/crawl_cngov.py
@@ -8,7 +8,7 @@ class Command(BaseCommand):

    def handle(self, *args, **kwargs):
        website, created = Website.objects.get_or_create(
-            name="www.gov.cn",
+            name="中国政府网",
            defaults={
                'article_list_url': 'https://www.gov.cn/',
                'article_selector': 'a'
--- a/core/management/commands/crawl_rmrb.py
+++ b/core/management/commands/crawl_rmrb.py
@@ -0,0 +1,26 @@
+from django.core.management.base import BaseCommand
+from core.models import Website
+from core.utils import full_site_crawler
+
+
+class Command(BaseCommand):
+    help = "全站递归爬取 人民日报 https://www.peopleapp.com"
+
+    def handle(self, *args, **kwargs):
+        website, created = Website.objects.get_or_create(
+            name="人民日报",
+            defaults={
+                'article_list_url': 'https://www.peopleapp.com/home',
+                'article_selector': 'a',
+                'base_url': 'https://www.peopleapp.com'
+            }
+        )
+        # 确保更新已存在的网站对象的base_url
+        if not created and not website.base_url:
+            website.base_url = 'https://www.peopleapp.com'
+            website.save()
+            
+        start_url = "https://www.peopleapp.com/home"
+        self.stdout.write(f"开始全站爬取: {start_url}")
+        full_site_crawler(start_url, website, max_pages=500)
+        self.stdout.write("爬取完成")
--- a/core/management/commands/crawl_xinhua.py
+++ b/core/management/commands/crawl_xinhua.py
@@ -8,7 +8,7 @@ class Command(BaseCommand):

    def handle(self, *args, **kwargs):
        website, created = Website.objects.get_or_create(
-            name="www.news.cn",
+            name="新华网",
            defaults={
                'article_list_url': 'https://www.news.cn/',
                'article_selector': 'a'
--- a/core/management/commands/export_articles.py
+++ b/core/management/commands/export_articles.py
@@ -4,7 +4,6 @@ import json
 import csv
 import os
 from django.conf import settings
-from django.core.files.storage import default_storage
 import zipfile
 from django.utils import timezone

--- a/core/templates/admin/core/article/change_list.html
+++ b/core/templates/admin/core/article/change_list.html
@@ -9,9 +9,9 @@
            <label for="website-select">选择网站:</label>
            <select name="website_name" id="website-select" required>
                <option value="">-- 请选择网站 --</option>
-                <option value="crawl_xinhua">新华网</option>
-                <option value="crawl_dongfangyancao">东方烟草报</option>
-                <option value="crawl_articles">通用爬虫</option>
+                {% for website in cl.model_admin.get_websites %}
+                    <option value="{{ website.name }}">{{ website.name }}</option>
+                {% endfor %}
            </select>
            <input type="submit" value="执行爬虫" class="default" style="margin-left: 10px;"/>
        </form>
--- a/core/templates/core/article_detail.html
+++ b/core/templates/core/article_detail.html
@@ -2,7 +2,8 @@
 <html lang="zh">
 <head>
    <meta charset="UTF-8"/>
-    <title>绿色课堂文章列表</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
+    <title>{{ article.title }} - 绿色课堂</title>
    <style>
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
@@ -10,15 +11,16 @@
            color: #333;
            margin: 0 auto;
            padding: 20px;
-            background-color: #f0f8ff; /* 统一背景色调 */
+            background-color: #f0f8ff;
+            max-width: 800px;
        }

        .container {
            background: white;
            padding: 30px;
            margin-bottom: 20px;
-            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); /* 添加轻微阴影 */
-            border-radius: 8px; /* 添加圆角 */
+            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05);
+            border-radius: 8px;
        }

        h1 {
@@ -28,765 +30,59 @@
            margin-top: 0;
        }

-        .filters {
+        .meta {
+            color: #78909c;
+            font-size: 0.9em;
            margin-bottom: 20px;
-            padding: 15px;
-            background-color: #e3f2fd; /* 统一滤镜背景色调 */
-            border-radius: 5px;
        }

-        .filters a {
+        .content {
+            margin-top: 20px;
+        }
+
+        .content img {
+            max-width: 100%;
+            height: auto;
+        }
+
+        .back-link {
            display: inline-block;
-            padding: 5px 10px;
-            margin: 0 5px 5px 0;
-            background-color: #bbdefb; /* 统一链接背景色调 */
+            margin-bottom: 20px;
+            color: #1976d2;
+            text-decoration: none;
+        }
+
+        .back-link:hover {
            color: #0d47a1;
-            text-decoration: none;
-            border-radius: 3px;
-        }
-
-        .filters a.active {
-            background-color: #3498db;
-            color: white;
-        }
-
-        ul {
-            list-style: none;
-            padding: 0;
-        }
-
-        li {
-            padding: 10px 0;
-            border-bottom: 1px solid #e0e0e0; /* 统一分隔线颜色 */
-        }
-
-        li:last-child {
-            border-bottom: none;
-        }
-
-        a {
-            color: #1976d2; /* 统一链接颜色 */
-            text-decoration: none;
-        }
-
-        a:hover {
-            color: #0d47a1; /* 统一悬停颜色 */
            text-decoration: underline;
        }

-        .meta {
-            color: #78909c; /* 统一元数据颜色 */
-            font-size: 0.9em;
-        }
-
-        .pagination {
-            margin-top: 30px;
-            text-align: center;
-            padding: 20px 0;
-        }
-
-        .pagination a {
-            display: inline-block;
-            padding: 8px 16px;
-            background-color: #3498db;
-            color: white;
-            text-decoration: none;
-            border-radius: 4px;
-            margin: 0 2px; /* 修改:调整页码间距 */
-        }
-
-        .pagination a:hover {
-            background-color: #2980b9;
-        }
-
-        .pagination span {
-            margin: 0 10px;
-            color: #7f8c8d;
-        }
-
-        /* 新增:当前页码样式 */
-        .pagination .current {
-            background-color: #2980b9;
-            cursor: default;
-        }
-
-        /* 新增:省略号样式 */
-        .pagination .ellipsis {
-            display: inline-block;
-            padding: 8px 4px;
-            color: #7f8c8d;
-        }
-
-        /* 新增:搜索框样式 */
-        .search-form {
-            margin-bottom: 20px;
-            padding: 15px;
-            background-color: #e3f2fd; /* 统一搜索框背景色调 */
-            border-radius: 5px;
-        }
-
-        .search-form input[type="text"] {
-            padding: 8px 12px;
-            border: 1px solid #bbdefb; /* 统一边框颜色 */
-            border-radius: 4px;
-            width: 300px;
-            margin-right: 10px;
-            background-color: #fff;
-        }
-
-        .search-form input[type="submit"] {
-            padding: 8px 16px;
-            background-color: #3498db;
-            color: white;
-            border: none;
-            border-radius: 4px;
-            cursor: pointer;
-        }
-
-        .search-form input[type="submit"]:hover {
-            background-color: #2980b9;
-        }
-
-        .search-info {
-            color: #78909c; /* 统一搜索信息颜色 */
-            font-size: 0.9em;
-            margin-bottom: 10px;
-        }
-
-        /* 新增:左侧筛选栏样式 */
-        .content-wrapper {
-            display: flex;
-            gap: 20px;
-        }
-
-        .sidebar {
-            flex: 0 0 200px;
-            background-color: #e3f2fd; /* 统一边栏背景色调 */
-            border-radius: 5px;
-            padding: 15px;
-        }
-
-        .main-content {
-            flex: 1;
-        }
-
-        .sidebar .filters {
-            margin-bottom: 20px;
-            padding: 0;
-            background-color: transparent;
-        }
-
-        .sidebar .filters strong {
-            display: block;
-            margin-bottom: 10px;
-            color: #2c3e50;
-        }
-
-        .sidebar .filters a {
-            display: block;
-            padding: 8px 10px;
-            margin: 0 0 5px 0;
-            background-color: #bbdefb; /* 统一边栏链接背景色调 */
-            color: #0d47a1;
-            text-decoration: none;
-            border-radius: 3px;
-        }
-
-        .sidebar .filters a.active {
-            background-color: #3498db;
-            color: white;
-        }
-
-        /* 新增:导出功能样式 */
-        .export-section {
-            margin-bottom: 20px;
-            padding: 15px;
-            background-color: #e8f5e9; /* 统一导出区域背景色调 */
-            border-radius: 5px;
-            text-align: center;
-        }
-
-        .export-btn {
-            padding: 10px 20px;
-            background-color: #4caf50; /* 统一按钮背景色调 */
-            color: white;
-            border: none;
-            border-radius: 4px;
-            cursor: pointer;
-            font-size: 16px;
-            margin: 0 5px;
-        }
-
-        .export-btn:hover {
-            background-color: #388e3c; /* 统一按钮悬停色调 */
-        }
-
-        .export-btn:disabled {
-            background-color: #9e9e9e; /* 统一禁用按钮色调 */
-            cursor: not-allowed;
-        }
-
-        .article-checkbox {
-            margin-right: 10px;
-        }
-
-        /* 新增:爬虫控制按钮样式 */
-        .crawler-control {
-            margin-bottom: 20px;
-            padding: 15px;
-            background-color: #fff3e0; /* 统一爬虫控制区域背景色调 */
-            border-radius: 5px;
-            text-align: center;
-        }
-
-        .crawler-btn {
-            padding: 10px 20px;
-            background-color: #ff9800; /* 统一爬虫按钮背景色调 */
-            color: white;
-            border: none;
-            border-radius: 4px;
-            cursor: pointer;
-            font-size: 16px;
-            margin: 0 5px;
-        }
-
-        .crawler-btn:hover {
-            background-color: #f57c00; /* 统一爬虫按钮悬停色调 */
-        }
-
-        .crawler-btn:disabled {
-            background-color: #9e9e9e; /* 统一禁用爬虫按钮色调 */
-            cursor: not-allowed;
-        }
-
-        .crawler-result {
-            margin-top: 10px;
+        @media (max-width: 600px) {
+            body {
                padding: 10px;
-            border-radius: 4px;
-            display: none;
            }
            
-        .crawler-result.success {
-            background-color: #e8f5e9;
-            color: #2e7d32;
+            .container {
+                padding: 15px;
            }
-
-        .crawler-result.error {
-            background-color: #ffebee;
-            color: #c62828;
-        }
-
-        /* 新增:进度条样式 */
-        .progress-container {
-            margin-top: 10px;
-            display: none;
-        }
-
-        .progress-bar {
-            width: 100%;
-            height: 20px;
-            background-color: #e0e0e0;
-            border-radius: 10px;
-            overflow: hidden;
-        }
-
-        .progress-fill {
-            height: 100%;
-            background-color: #4caf50;
-            width: 0%;
-            transition: width 0.3s ease;
-        }
-
-        .progress-text {
-            margin-top: 5px;
-            font-size: 14px;
-            color: #666;
        }
    </style>
 </head>
 <body>
-<div class="container">
-    <h1>绿色课堂文章列表</h1>
+    <div class="container">
+        <a href="{% url 'article_list' %}" class="back-link">&laquo; 返回文章列表</a>

-    <!-- 新增:搜索表单 -->
-    <div class="search-form">
-        <form method="get">
-            <input type="text" name="q" placeholder="输入关键词搜索文章..." value="{{ search_query }}">
-            {% if selected_website %}
-                <input type="hidden" name="website" value="{{ selected_website.id }}">
-            {% endif %}
-            <input type="submit" value="搜索">
-        </form>
+        <h1>{{ article.title }}</h1>
+
+        <div class="meta">
+            网站: {{ article.website.name }} |
+            发布时间: {{ article.pub_date|date:"Y-m-d H:i" }} |
+            创建时间: {{ article.created_at|date:"Y-m-d H:i" }}
        </div>

-    <div class="content-wrapper">
-        <!-- 左侧筛选栏 -->
-        <div class="sidebar">
-            <div class="filters">
-                <strong>按网站筛选：</strong>
-                <a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}"
-                   {% if not selected_website %}class="active" {% endif %}>全部</a>
-                {% for website in websites %}
-                    <a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}"
-                       {% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
-                {% endfor %}
+        <div class="content">
+            {{ article.content|safe }}
        </div>
    </div>
-
-        <!-- 主内容区域 -->
-        <div class="main-content">
-            <!-- 新增:搜索结果信息 -->
-            {% if search_query %}
-                <div class="search-info">
-                    搜索 "{{ search_query }}" 找到 {{ page_obj.paginator.count }} 篇文章
-                    <a href="{% if selected_website %}?website={{ selected_website.id }}{% else %}{% url 'article_list' %}{% endif %}">清除搜索</a>
-                </div>
-            {% endif %}
-
-            <!-- 新增:导出功能 -->
-            <div class="export-section">
-                <button id="selectAllBtn" class="export-btn">全选</button>
-                <button id="deselectAllBtn" class="export-btn">取消全选</button>
-                <button id="exportJsonBtn" class="export-btn" disabled>导出为JSON</button>
-                <button id="exportCsvBtn" class="export-btn" disabled>导出为CSV</button>
-                <!-- 新增:导出为ZIP包按钮 -->
-                <button id="exportZipBtn" class="export-btn" disabled>导出为ZIP包</button>
-            </div>
-
-            <ul>
-                {% for article in page_obj %}
-                    <li>
-                        <input type="checkbox" class="article-checkbox" value="{{ article.id }}"
-                               id="article_{{ article.id }}">
-                        <a href="{% url 'article_detail' article.id %}">{{ article.title }}</a>
-                        <div class="meta">({{ article.website.name }} - {{ article.created_at|date:"Y-m-d" }})</div>
-                    </li>
-                    {% empty %}
-                    <li>暂无文章</li>
-                {% endfor %}
-            </ul>
-
-            <div class="pagination">
-                {% if page_obj.has_previous %}
-                    {% if selected_website %}
-                        <a href="?website=
-                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo;
-                            首页</a>
-                        <a href="?website=
-                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
-                    {% else %}
-                        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page=1">&laquo; 首页</a>
-                        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.previous_page_number }}">上一页</a>
-                    {% endif %}
-                {% endif %}
-
-                <span>第 {{ page_obj.number }} 页，共 {{ page_obj.paginator.num_pages }} 页</span>
-
-                <!-- 修改:优化页码显示逻辑 -->
-                {% with page_obj.paginator as paginator %}
-                    {% for num in paginator.page_range %}
-                        {% if page_obj.number == num %}
-                            <a href="#" class="current">{{ num }}</a>
-                        {% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
-                            {% if selected_website %}
-                                <a href="?website=
-                                        {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
-                            {% else %}
-                                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
-                            {% endif %}
-                        {% elif num == 1 or num == paginator.num_pages %}
-                            {% if selected_website %}
-                                <a href="?website=
-                                        {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
-                            {% else %}
-                                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
-                            {% endif %}
-                        {% elif num == page_obj.number|add:'-3' or num == page_obj.number|add:'3' %}
-                            <span class="ellipsis">...</span>
-                        {% endif %}
-                    {% endfor %}
-                {% endwith %}
-
-                {% if page_obj.has_next %}
-                    {% if selected_website %}
-                        <a href="?website=
-                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
-                        <a href="?website=
-                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页
-                            &raquo;</a>
-                    {% else %}
-                        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.next_page_number }}">下一页</a>
-                        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页
-                            &raquo;</a>
-                    {% endif %}
-                {% endif %}
-            </div>
-        </div>
-    </div>
-</div>
-
-<script>
-    // 存储当前任务ID和检查状态的定时器
-    let currentTaskId = null;
-    let statusCheckInterval = null;
-
-    // 获取页面元素
-    const runCrawlerBtn = document.getElementById('runCrawlerBtn');
-    const runDongfangyancaoCrawlerBtn = document.getElementById('runDongfangyancaoCrawlerBtn');
-    const pauseCrawlerBtn = document.getElementById('pauseCrawlerBtn');
-    const progressContainer = document.getElementById('crawlerProgress');
-    const progressFill = document.getElementById('progressFill');
-    const progressText = document.getElementById('progressText');
-    const resultDiv = document.getElementById('crawlerResult');
-
-    // 绑定爬虫按钮事件
-    runCrawlerBtn.addEventListener('click', function () {
-        runCrawler('www.news.cn', 'crawl_xinhua');
-    });
-
-    runDongfangyancaoCrawlerBtn.addEventListener('click', function () {
-        runCrawler('东方烟草报', 'crawl_dongfangyancao');
-    });
-
-    // 暂停按钮事件
-    pauseCrawlerBtn.addEventListener('click', function () {
-        if (currentTaskId) {
-            pauseCrawler(currentTaskId);
-        }
-    });
-
-    // 运行爬虫函数
-    function runCrawler(websiteName, crawlerName) {
-        // 禁用按钮，防止重复点击
-        runCrawlerBtn.disabled = true;
-        runDongfangyancaoCrawlerBtn.disabled = true;
-        resultDiv.style.display = 'none';
-
-        // 显示进度区域
-        progressContainer.style.display = 'block';
-        updateProgress(0, '爬虫启动中...');
-
-        // 发送POST请求运行爬虫
-        fetch('{% url "run_crawler" %}', {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/x-www-form-urlencoded',
-                'X-CSRFToken': '{{ csrf_token }}'
-            },
-            body: 'crawler_name=' + crawlerName
-        })
-            .then(response => response.json())
-            .then(data => {
-                if (data.status === 'success') {
-                    currentTaskId = data.task_id;
-                    pauseCrawlerBtn.disabled = false;
-                    // 启动轮询检查爬虫状态
-                    checkCrawlerStatus(currentTaskId);
-                } else {
-                    // 显示错误信息
-                    resultDiv.style.display = 'block';
-                    resultDiv.className = 'crawler-result error';
-                    resultDiv.textContent = data.message;
-                    // 恢复按钮状态
-                    runCrawlerBtn.disabled = false;
-                    runDongfangyancaoCrawlerBtn.disabled = false;
-                    progressContainer.style.display = 'none';
-                }
-            })
-            .catch(error => {
-                // 显示错误信息
-                resultDiv.style.display = 'block';
-                resultDiv.className = 'crawler-result error';
-                resultDiv.textContent = '请求失败: ' + error;
-                // 恢复按钮状态
-                runCrawlerBtn.disabled = false;
-                runDongfangyancaoCrawlerBtn.disabled = false;
-                progressContainer.style.display = 'none';
-            });
-    }
-
-    // 检查爬虫状态的函数
-    function checkCrawlerStatus(taskId) {
-        // 清除之前的定时器
-        if (statusCheckInterval) {
-            clearInterval(statusCheckInterval);
-        }
-
-        // 设置新的定时器，每秒检查一次状态
-        statusCheckInterval = setInterval(() => {
-            fetch('{% url "crawler_status" %}', {
-                method: 'POST',
-                headers: {
-                    'Content-Type': 'application/x-www-form-urlencoded',
-                    'X-CSRFToken': '{{ csrf_token }}'
-                },
-                body: 'task_id=' + taskId
-            })
-                .then(response => response.json())
-                .then(data => {
-                    if (data.status === 'running') {
-                        // 更新进度信息（模拟进度）
-                        const elapsedTime = new Date() - new Date(data.start_time);
-                        const progress = Math.min(90, Math.floor(elapsedTime / 1000));
-                        updateProgress(progress, data.message);
-                    } else if (data.status === 'completed') {
-                        // 爬虫完成，显示结果
-                        clearInterval(statusCheckInterval);
-                        updateProgress(100, data.message);
-
-                        // 恢复按钮状态
-                        runCrawlerBtn.disabled = false;
-                        runDongfangyancaoCrawlerBtn.disabled = false;
-                        pauseCrawlerBtn.disabled = true;
-
-                        // 显示结果信息
-                        resultDiv.style.display = 'block';
-                        resultDiv.className = 'crawler-result success';
-                        resultDiv.textContent = data.message;
-
-                        // 3秒后自动隐藏进度条
-                        setTimeout(() => {
-                            progressContainer.style.display = 'none';
-                        }, 3000);
-
-                        // 自动刷新页面以显示新文章
-                        setTimeout(() => {
-                            location.reload();
-                        }, 2000);
-                    } else if (data.status === 'paused') {
-                        // 爬虫暂停
-                        clearInterval(statusCheckInterval);
-                        updateProgress(data.progress || 0, '爬虫已暂停');
-
-                        // 恢复按钮状态
-                        runCrawlerBtn.disabled = false;
-                        runDongfangyancaoCrawlerBtn.disabled = false;
-                        pauseCrawlerBtn.disabled = true;
-
-                        // 显示结果信息
-                        resultDiv.style.display = 'block';
-                        resultDiv.className = 'crawler-result success';
-                        resultDiv.textContent = '爬虫已暂停';
-                    } else if (data.status === 'error') {
-                        // 爬虫出错
-                        clearInterval(statusCheckInterval);
-                        progressContainer.style.display = 'none';
-                        resultDiv.style.display = 'block';
-                        resultDiv.className = 'crawler-result error';
-                        resultDiv.textContent = data.message;
-
-                        // 恢复按钮状态
-                        runCrawlerBtn.disabled = false;
-                        runDongfangyancaoCrawlerBtn.disabled = false;
-                        pauseCrawlerBtn.disabled = true;
-                    }
-                })
-                .catch(error => {
-                    clearInterval(statusCheckInterval);
-                    progressContainer.style.display = 'none';
-                    resultDiv.style.display = 'block';
-                    resultDiv.className = 'crawler-result error';
-                    resultDiv.textContent = '检查状态失败: ' + error;
-
-                    // 恢复按钮状态
-                    runCrawlerBtn.disabled = false;
-                    runDongfangyancaoCrawlerBtn.disabled = false;
-                    pauseCrawlerBtn.disabled = true;
-                });
-        }, 1000);
-    }
-
-    // 更新进度条函数
-    function updateProgress(percent, text) {
-        progressFill.style.width = percent + '%';
-        progressText.textContent = text;
-    }
-
-    // 暂停爬虫函数
-    function pauseCrawler(taskId) {
-        fetch('{% url "pause_crawler" %}', {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/x-www-form-urlencoded',
-                'X-CSRFToken': '{{ csrf_token }}'
-            },
-            body: 'task_id=' + taskId
-        })
-            .then(response => response.json())
-            .then(data => {
-                if (data.status === 'success') {
-                    // 暂停成功，更新UI
-                    pauseCrawlerBtn.disabled = true;
-                    updateProgress(data.progress || 0, '正在暂停...');
-                } else {
-                    // 显示错误信息
-                    resultDiv.style.display = 'block';
-                    resultDiv.className = 'crawler-result error';
-                    resultDiv.textContent = data.message;
-                }
-            })
-            .catch(error => {
-                // 显示错误信息
-                resultDiv.style.display = 'block';
-                resultDiv.className = 'crawler-result error';
-                resultDiv.textContent = '暂停请求失败: ' + error;
-            });
-    }
-
-    // 导出功能相关JavaScript
-    const checkboxes = document.querySelectorAll('.article-checkbox');
-    const exportJsonBtn = document.getElementById('exportJsonBtn');
-    const exportCsvBtn = document.getElementById('exportCsvBtn');
-    const selectAllBtn = document.getElementById('selectAllBtn');
-    const deselectAllBtn = document.getElementById('deselectAllBtn');
-    // 新增:获取ZIP导出按钮元素
-    const exportZipBtn = document.getElementById('exportZipBtn');
-
-    // 更新导出按钮状态
-    function updateExportButtons() {
-        const selectedCount = document.querySelectorAll('.article-checkbox:checked').length;
-        exportJsonBtn.disabled = selectedCount === 0;
-        exportCsvBtn.disabled = selectedCount === 0;
-        exportZipBtn.disabled = selectedCount === 0; // 新增:更新ZIP导出按钮状态
-    }
-
-    // 为所有复选框添加事件监听器
-    checkboxes.forEach(checkbox => {
-        checkbox.addEventListener('change', updateExportButtons);
-    });
-
-    // 全选功能
-    selectAllBtn.addEventListener('click', () => {
-        checkboxes.forEach(checkbox => {
-            checkbox.checked = true;
-        });
-        updateExportButtons();
-    });
-
-    // 取消全选功能
-    deselectAllBtn.addEventListener('click', () => {
-        checkboxes.forEach(checkbox => {
-            checkbox.checked = false;
-        });
-        updateExportButtons();
-    });
-
-    // 导出为JSON功能
-    exportJsonBtn.addEventListener('click', () => {
-        const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
-            .map(checkbox => checkbox.value);
-
-        // 发送POST请求导出文章
-        fetch('{% url "export_articles" %}', {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json',
-                'X-CSRFToken': '{{ csrf_token }}'
-            },
-            body: JSON.stringify({
-                article_ids: selectedArticles,
-                format: 'json'
-            })
-        })
-            .then(response => {
-                if (response.ok) {
-                    return response.blob();
-                }
-                throw new Error('导出失败');
-            })
-            .then(blob => {
-                const url = window.URL.createObjectURL(blob);
-                const a = document.createElement('a');
-                a.href = url;
-                a.download = 'articles.json';
-                document.body.appendChild(a);
-                a.click();
-                window.URL.revokeObjectURL(url);
-                document.body.removeChild(a);
-            })
-            .catch(error => {
-                alert('导出失败: ' + error);
-            });
-    });
-
-    // 导出为CSV功能
-    exportCsvBtn.addEventListener('click', () => {
-        const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
-            .map(checkbox => checkbox.value);
-
-        // 发送POST请求导出文章
-        fetch('{% url "export_articles" %}', {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json',
-                'X-CSRFToken': '{{ csrf_token }}'
-            },
-            body: JSON.stringify({
-                article_ids: selectedArticles,
-                format: 'csv'
-            })
-        })
-            .then(response => {
-                if (response.ok) {
-                    return response.blob();
-                }
-                throw new Error('导出失败');
-            })
-            .then(blob => {
-                const url = window.URL.createObjectURL(blob);
-                const a = document.createElement('a');
-                a.href = url;
-                a.download = 'articles.csv';
-                document.body.appendChild(a);
-                a.click();
-                window.URL.revokeObjectURL(url);
-                document.body.removeChild(a);
-            })
-            .catch(error => {
-                alert('导出失败: ' + error);
-            });
-    });
-
-    // 新增:导出为ZIP包功能
-    exportZipBtn.addEventListener('click', () => {
-        const selectedArticles = Array.from(document.querySelectorAll('.article-checkbox:checked'))
-            .map(checkbox => checkbox.value);
-
-        // 发送POST请求导出文章为ZIP包
-        fetch('{% url "export_articles" %}', {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json',
-                'X-CSRFToken': '{{ csrf_token }}'
-            },
-            body: JSON.stringify({
-                article_ids: selectedArticles,
-                format: 'zip' // 指定导出格式为ZIP
-            })
-        })
-            .then(response => {
-                if (response.ok) {
-                    return response.blob();
-                }
-                throw new Error('导出失败');
-            })
-            .then(blob => {
-                const url = window.URL.createObjectURL(blob);
-                const a = document.createElement('a');
-                a.href = url;
-                a.download = 'articles.zip';
-                document.body.appendChild(a);
-                a.click();
-                window.URL.revokeObjectURL(url);
-                document.body.removeChild(a);
-            })
-            .catch(error => {
-                alert('导出失败: ' + error);
-            });
-    });
-
-    // 初始化导出按钮状态
-    updateExportButtons();
-</script>
 </body>
 </html>
--- a/core/templates/core/article_list.html
+++ b/core/templates/core/article_list.html
@@ -17,7 +17,7 @@
            background: white;
            padding: 30px;
            margin-bottom: 20px;
-            box-shadow: 0 2px 5px rgba(0, 0, 0, 0.05); /* 添加轻微阴影 */
+            box-shadow: 0 2px 5px rgba(0,0,0,0.05); /* 添加轻微阴影 */
            border-radius: 8px; /* 添加圆角 */
        }

@@ -251,11 +251,9 @@
        <div class="sidebar">
            <div class="filters">
                <strong>按网站筛选：</strong>
-                <a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}"
-                   {% if not selected_website %}class="active" {% endif %}>全部</a>
+                <a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}" {% if not selected_website %}class="active" {% endif %}>全部</a>
                {% for website in websites %}
-                    <a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}"
-                       {% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
+                <a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}" {% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
                {% endfor %}
            </div>
        </div>
@@ -283,8 +281,7 @@
            <ul>
                {% for article in page_obj %}
                <li>
-                        <input type="checkbox" class="article-checkbox" value="{{ article.id }}"
-                               id="article_{{ article.id }}">
+                    <input type="checkbox" class="article-checkbox" value="{{ article.id }}" id="article_{{ article.id }}">
                    <a href="{% url 'article_detail' article.id %}">{{ article.title }}</a>
                    <div class="meta">({{ article.website.name }} - {{ article.created_at|date:"Y-m-d" }})</div>
                </li>
@@ -296,11 +293,8 @@
            <div class="pagination">
                {% if page_obj.has_previous %}
                {% if selected_website %}
-                        <a href="?website=
-                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo;
-                            首页</a>
-                        <a href="?website=
-                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo; 首页</a>
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
                {% else %}
                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page=1">&laquo; 首页</a>
                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.previous_page_number }}">上一页</a>
@@ -316,15 +310,13 @@
                <a href="#" class="current">{{ num }}</a>
                {% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
                {% if selected_website %}
-                                <a href="?website=
-                                        {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
                {% else %}
                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
                {% endif %}
                {% elif num == 1 or num == paginator.num_pages %}
                {% if selected_website %}
-                                <a href="?website=
-                                        {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
                {% else %}
                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
                {% endif %}
@@ -336,15 +328,11 @@

                {% if page_obj.has_next %}
                {% if selected_website %}
-                        <a href="?website=
-                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
-                        <a href="?website=
-                                {{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页
-                            &raquo;</a>
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
+                <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
                {% else %}
                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.next_page_number }}">下一页</a>
-                        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页
-                            &raquo;</a>
+                <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
                {% endif %}
                {% endif %}
            </div>
--- a/core/utils.py
+++ b/core/utils.py
@@ -107,6 +107,17 @@ def process_article(url, website):
                soup.find("div", id="content") or
                soup.find("div", class_="mainBody")
        )
+    elif website.name == "人民日报":
+        # 人民日报网站的文章结构处理
+        title_tag = soup.find("h1") or soup.find("title")
+        # 查找主要内容区域
+        content_tag = (
+                soup.find("div", class_="content") or
+                soup.find("div", class_="article-content") or
+                soup.find("div", id="content") or
+                soup.find("div", class_="text") or
+                soup.find("section", class_="content")
+        )
    else:
        # 默认处理方式
        title_tag = soup.find("h1") or soup.find("title")
@@ -256,6 +267,24 @@ def full_site_crawler(start_url, website, max_pages=1000):
                    ("/xinwen/" in path) or
                    ("/huoban/" in path)
            )
+        elif website.name == "人民日报":
+            # 人民日报的文章页面判断逻辑
+            parsed_url = urlparse(url)
+            path = parsed_url.path
+            # 修改: 增加更准确的文章页面判断逻辑
+            is_article_page = (
+                    (soup.find("div", class_="content") is not None and 
+                     soup.find("h1") is not None) or
+                    soup.find("div", class_="article-content") is not None or
+                    (soup.find("div", id="content") is not None and 
+                     soup.find("h1") is not None) or
+                    soup.find("div", class_="text") is not None or
+                    soup.find("section", class_="content") is not None or
+                    ("/article/" in path) or
+                    (path.startswith("/detail/") and len(path) > 10) or
+                    # 增加对peopleapp.com特定文章路径的判断
+                    ("/dynamic/" in path and "article" in path)
+            )
        else:
            # 默认判断逻辑
            is_article_page = (
@@ -271,5 +300,18 @@ def full_site_crawler(start_url, website, max_pages=1000):
        # 扩展队列，发现新链接
        for link in soup.find_all("a", href=True):
            href = urljoin(url, link["href"])
-            if href not in visited and is_valid_url(href, base_netloc):
+            # 对于人民日报网站，我们扩展链接发现逻辑
+            if website.name == "人民日报":
+                # 允许爬取以https://www.peopleapp.com/开头的链接
+                if href.startswith("https://www.peopleapp.com/") and href not in visited:
+                    # 增加对文章链接的识别
+                    parsed_href = urlparse(href)
+                    href_path = parsed_href.path
+                    # 添加更多可能的文章链接模式
+                    if ("/article/" in href_path or 
+                        href_path.startswith("/detail/") or 
+                        ("/dynamic/" in href_path and "article" in href_path) or
+                        href_path.count("/") > 2):  # 更深层的页面可能是文章页
+                        queue.append(href)
+            elif href not in visited and is_valid_url(href, base_netloc):
                queue.append(href)
--- a/core/views.py
+++ b/core/views.py
@@ -22,6 +22,7 @@ def article_list(request):

    # 获取筛选网站
    selected_website = None
+    # 修改：确保始终获取所有文章，除非有特定筛选
    articles = Article.objects.all()

    website_id = request.GET.get('website')