From bfd16048723286dfb7a5c6b36467704193ed061a Mon Sep 17 00:00:00 2001
From: yuangyaa <yuangyaa@163.com>
Date: Mon, 11 Aug 2025 22:55:57 +0800
Subject: [PATCH] Add packages

---
 core/templates/core/article_detail.html |  2 +-
 core/templates/core/article_list.html   | 42 +++++++++++++++++++++++--
 core/utils.py                           | 30 +++++++++---------
 3 files changed, 56 insertions(+), 18 deletions(-)
diff --git a/core/templates/core/article_detail.html b/core/templates/core/article_detail.html
index f0aee66..155b24b 100644
--- a/core/templates/core/article_detail.html
+++ b/core/templates/core/article_detail.html
@@ -8,7 +8,7 @@
             font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
             line-height: 1.6;
             color: #333;
-            max-width: 800px;
+            max-width: 1200px; /* 修改:同步调整页面最大宽度与列表页一致 */
             margin: 0 auto;
             padding: 20px;
             background-color: #f8f9fa;
diff --git a/core/templates/core/article_list.html b/core/templates/core/article_list.html
index 87764b1..faaa36f 100644
--- a/core/templates/core/article_list.html
+++ b/core/templates/core/article_list.html
@@ -8,7 +8,7 @@
             font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
             line-height: 1.6;
             color: #333;
-            max-width: 800px;
+            max-width: 1200px; /* 修改:增加页面最大宽度 */
             margin: 0 auto;
             padding: 20px;
             background-color: #f8f9fa;
@@ -80,7 +80,7 @@
             color: white;
             text-decoration: none;
             border-radius: 4px;
-            margin: 0 5px;
+            margin: 0 2px; /* 修改:调整页码间距 */
         }
         .pagination a:hover {
             background-color: #2980b9;
@@ -89,6 +89,17 @@
             margin: 0 10px;
             color: #7f8c8d;
         }
+        /* 新增:当前页码样式 */
+        .pagination .current {
+            background-color: #2980b9;
+            cursor: default;
+        }
+        /* 新增:省略号样式 */
+        .pagination .ellipsis {
+            display: inline-block;
+            padding: 8px 4px;
+            color: #7f8c8d;
+        }
     </style>
 </head>
 <body>
@@ -117,19 +128,46 @@
         <div class="pagination">
             {% if page_obj.has_previous %}
                 {% if selected_website %}
+                    <a href="?website={{ selected_website.id }}&page=1">&laquo; 首页</a>
                     <a href="?website={{ selected_website.id }}&page={{ page_obj.previous_page_number }}">上一页</a>
                 {% else %}
+                    <a href="?page=1">&laquo; 首页</a>
                     <a href="?page={{ page_obj.previous_page_number }}">上一页</a>
                 {% endif %}
             {% endif %}
 
             <span>第 {{ page_obj.number }} 页，共 {{ page_obj.paginator.num_pages }} 页</span>
+            
+            <!-- 修改:优化页码显示逻辑 -->
+            {% with page_obj.paginator as paginator %}
+                {% for num in paginator.page_range %}
+                    {% if page_obj.number == num %}
+                        <a href="#" class="current">{{ num }}</a>
+                    {% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
+                        {% if selected_website %}
+                            <a href="?website={{ selected_website.id }}&page={{ num }}">{{ num }}</a>
+                        {% else %}
+                            <a href="?page={{ num }}">{{ num }}</a>
+                        {% endif %}
+                    {% elif num == 1 or num == paginator.num_pages %}
+                        {% if selected_website %}
+                            <a href="?website={{ selected_website.id }}&page={{ num }}">{{ num }}</a>
+                        {% else %}
+                            <a href="?page={{ num }}">{{ num }}</a>
+                        {% endif %}
+                    {% elif num == page_obj.number|add:'-3' or num == page_obj.number|add:'3' %}
+                        <span class="ellipsis">...</span>
+                    {% endif %}
+                {% endfor %}
+            {% endwith %}
 
             {% if page_obj.has_next %}
                 {% if selected_website %}
                     <a href="?website={{ selected_website.id }}&page={{ page_obj.next_page_number }}">下一页</a>
+                    <a href="?website={{ selected_website.id }}&page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
                 {% else %}
                     <a href="?page={{ page_obj.next_page_number }}">下一页</a>
+                    <a href="?page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
                 {% endif %}
             {% endif %}
         </div>
diff --git a/core/utils.py b/core/utils.py
index 2fd3ff8..ce8236b 100644
--- a/core/utils.py
+++ b/core/utils.py
@@ -29,10 +29,10 @@ def download_media(url, save_dir):
     if not filename or '.' not in filename:
         # 如果URL路径中没有有效的文件名，使用默认名称
         filename = 'media_file'
-        
+
     # 清理文件名中的特殊字符
     filename = re.sub(r'[^\w\-_\.]', '_', filename)
-    
+
     # 确保文件有扩展名
     if '.' not in filename:
         content_type = resp.headers.get('content-type', '')
@@ -77,11 +77,11 @@ def process_article(url, website):
     elif website.name == "东方烟草报":
         # 优化东方烟草报的标题提取逻辑，按优先级尝试多种选择器
         title_tag = (
-            soup.find("h1", id="title") or  # 特别针对带id="title"的h1标签
-            soup.find("h1") or  # 主要标题标签
-            soup.find("title") or  # 页面title标签
-            soup.find("div", class_="title") or  # 某些页面可能使用div.title
-            soup.find("h2")  # 备选标题标签
+                soup.find("h1", id="title") or  # 特别针对带id="title"的h1标签
+                soup.find("h1") or  # 主要标题标签
+                soup.find("title") or  # 页面title标签
+                soup.find("div", class_="title") or  # 某些页面可能使用div.title
+                soup.find("h2")  # 备选标题标签
         )
         content_tag = soup.find("div", class_="content")  # 东方烟草报的内容通常在div.content中
         # 增加对另一种内容结构的支持
@@ -96,7 +96,7 @@ def process_article(url, website):
         content_tag = soup.find("div", class_="content") or soup.find("div", id="content")
 
     title = title_tag.get_text(strip=True) if title_tag else "无标题"
-    
+
     # 对标题进行额外处理，去除可能的多余空白字符
     title = title.strip() if title else "无标题"
 
@@ -184,16 +184,16 @@ def full_site_crawler(start_url, website, max_pages=1000):
             parsed_url = urlparse(url)
             path = parsed_url.path
             is_article_page = (
-                soup.find("div", class_="content") is not None or
-                soup.find("div", id="gallery") is not None or
-                soup.find("div", id="ContentText") is not None or
-                ("/content/" in path and len(path) > 20)
+                    soup.find("div", class_="content") is not None or
+                    soup.find("div", id="gallery") is not None or
+                    soup.find("div", id="ContentText") is not None or
+                    ("/content/" in path and len(path) > 20)
             )
         else:
             # 默认判断逻辑
             is_article_page = (
-                soup.find("div", class_="content") is not None or 
-                soup.find("div", id="content") is not None
+                    soup.find("div", class_="content") is not None or
+                    soup.find("div", id="content") is not None
             )
 
         # 如果是文章页面，则调用文章处理
@@ -205,4 +205,4 @@ def full_site_crawler(start_url, website, max_pages=1000):
         for link in soup.find_all("a", href=True):
             href = urljoin(url, link["href"])
             if href not in visited and is_valid_url(href, base_netloc):
-                queue.append(href)
\ No newline at end of file
+                queue.append(href)