diff --git a/core/templates/core/article_detail.html b/core/templates/core/article_detail.html
index f0aee66..155b24b 100644
--- a/core/templates/core/article_detail.html
+++ b/core/templates/core/article_detail.html
@@ -8,7 +8,7 @@
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
line-height: 1.6;
color: #333;
- max-width: 800px;
+ max-width: 1200px; /* 修改:同步调整页面最大宽度与列表页一致 */
margin: 0 auto;
padding: 20px;
background-color: #f8f9fa;
diff --git a/core/templates/core/article_list.html b/core/templates/core/article_list.html
index 87764b1..faaa36f 100644
--- a/core/templates/core/article_list.html
+++ b/core/templates/core/article_list.html
@@ -8,7 +8,7 @@
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
line-height: 1.6;
color: #333;
- max-width: 800px;
+ max-width: 1200px; /* 修改:增加页面最大宽度 */
margin: 0 auto;
padding: 20px;
background-color: #f8f9fa;
@@ -80,7 +80,7 @@
color: white;
text-decoration: none;
border-radius: 4px;
- margin: 0 5px;
+ margin: 0 2px; /* 修改:调整页码间距 */
}
.pagination a:hover {
background-color: #2980b9;
@@ -89,6 +89,17 @@
margin: 0 10px;
color: #7f8c8d;
}
+ /* 新增:当前页码样式 */
+ .pagination .current {
+ background-color: #2980b9;
+ cursor: default;
+ }
+ /* 新增:省略号样式 */
+ .pagination .ellipsis {
+ display: inline-block;
+ padding: 8px 4px;
+ color: #7f8c8d;
+ }
@@ -117,19 +128,46 @@
diff --git a/core/utils.py b/core/utils.py
index 2fd3ff8..ce8236b 100644
--- a/core/utils.py
+++ b/core/utils.py
@@ -29,10 +29,10 @@ def download_media(url, save_dir):
if not filename or '.' not in filename:
# 如果URL路径中没有有效的文件名,使用默认名称
filename = 'media_file'
-
+
# 清理文件名中的特殊字符
filename = re.sub(r'[^\w\-_\.]', '_', filename)
-
+
# 确保文件有扩展名
if '.' not in filename:
content_type = resp.headers.get('content-type', '')
@@ -77,11 +77,11 @@ def process_article(url, website):
elif website.name == "东方烟草报":
# 优化东方烟草报的标题提取逻辑,按优先级尝试多种选择器
title_tag = (
- soup.find("h1", id="title") or # 特别针对带id="title"的h1标签
- soup.find("h1") or # 主要标题标签
- soup.find("title") or # 页面title标签
- soup.find("div", class_="title") or # 某些页面可能使用div.title
- soup.find("h2") # 备选标题标签
+ soup.find("h1", id="title") or # 特别针对带id="title"的h1标签
+ soup.find("h1") or # 主要标题标签
+ soup.find("title") or # 页面title标签
+ soup.find("div", class_="title") or # 某些页面可能使用div.title
+ soup.find("h2") # 备选标题标签
)
content_tag = soup.find("div", class_="content") # 东方烟草报的内容通常在div.content中
# 增加对另一种内容结构的支持
@@ -96,7 +96,7 @@ def process_article(url, website):
content_tag = soup.find("div", class_="content") or soup.find("div", id="content")
title = title_tag.get_text(strip=True) if title_tag else "无标题"
-
+
# 对标题进行额外处理,去除可能的多余空白字符
title = title.strip() if title else "无标题"
@@ -184,16 +184,16 @@ def full_site_crawler(start_url, website, max_pages=1000):
parsed_url = urlparse(url)
path = parsed_url.path
is_article_page = (
- soup.find("div", class_="content") is not None or
- soup.find("div", id="gallery") is not None or
- soup.find("div", id="ContentText") is not None or
- ("/content/" in path and len(path) > 20)
+ soup.find("div", class_="content") is not None or
+ soup.find("div", id="gallery") is not None or
+ soup.find("div", id="ContentText") is not None or
+ ("/content/" in path and len(path) > 20)
)
else:
# 默认判断逻辑
is_article_page = (
- soup.find("div", class_="content") is not None or
- soup.find("div", id="content") is not None
+ soup.find("div", class_="content") is not None or
+ soup.find("div", id="content") is not None
)
# 如果是文章页面,则调用文章处理
@@ -205,4 +205,4 @@ def full_site_crawler(start_url, website, max_pages=1000):
for link in soup.find_all("a", href=True):
href = urljoin(url, link["href"])
if href not in visited and is_valid_url(href, base_netloc):
- queue.append(href)
\ No newline at end of file
+ queue.append(href)