Add packages

This commit is contained in:
2025-08-11 22:55:57 +08:00
parent d9d2ea9d99
commit bfd1604872
3 changed files with 56 additions and 18 deletions

View File

@@ -8,7 +8,7 @@
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
line-height: 1.6; line-height: 1.6;
color: #333; color: #333;
max-width: 800px; max-width: 1200px; /* 修改:同步调整页面最大宽度与列表页一致 */
margin: 0 auto; margin: 0 auto;
padding: 20px; padding: 20px;
background-color: #f8f9fa; background-color: #f8f9fa;

View File

@@ -8,7 +8,7 @@
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
line-height: 1.6; line-height: 1.6;
color: #333; color: #333;
max-width: 800px; max-width: 1200px; /* 修改:增加页面最大宽度 */
margin: 0 auto; margin: 0 auto;
padding: 20px; padding: 20px;
background-color: #f8f9fa; background-color: #f8f9fa;
@@ -80,7 +80,7 @@
color: white; color: white;
text-decoration: none; text-decoration: none;
border-radius: 4px; border-radius: 4px;
margin: 0 5px; margin: 0 2px; /* 修改:调整页码间距 */
} }
.pagination a:hover { .pagination a:hover {
background-color: #2980b9; background-color: #2980b9;
@@ -89,6 +89,17 @@
margin: 0 10px; margin: 0 10px;
color: #7f8c8d; color: #7f8c8d;
} }
/* 新增:当前页码样式 */
.pagination .current {
background-color: #2980b9;
cursor: default;
}
/* 新增:省略号样式 */
.pagination .ellipsis {
display: inline-block;
padding: 8px 4px;
color: #7f8c8d;
}
</style> </style>
</head> </head>
<body> <body>
@@ -117,19 +128,46 @@
<div class="pagination"> <div class="pagination">
{% if page_obj.has_previous %} {% if page_obj.has_previous %}
{% if selected_website %} {% if selected_website %}
<a href="?website={{ selected_website.id }}&page=1">&laquo; 首页</a>
<a href="?website={{ selected_website.id }}&page={{ page_obj.previous_page_number }}">上一页</a> <a href="?website={{ selected_website.id }}&page={{ page_obj.previous_page_number }}">上一页</a>
{% else %} {% else %}
<a href="?page=1">&laquo; 首页</a>
<a href="?page={{ page_obj.previous_page_number }}">上一页</a> <a href="?page={{ page_obj.previous_page_number }}">上一页</a>
{% endif %} {% endif %}
{% endif %} {% endif %}
<span>第 {{ page_obj.number }} 页,共 {{ page_obj.paginator.num_pages }} 页</span> <span>第 {{ page_obj.number }} 页,共 {{ page_obj.paginator.num_pages }} 页</span>
<!-- 修改:优化页码显示逻辑 -->
{% with page_obj.paginator as paginator %}
{% for num in paginator.page_range %}
{% if page_obj.number == num %}
<a href="#" class="current">{{ num }}</a>
{% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
{% if selected_website %}
<a href="?website={{ selected_website.id }}&page={{ num }}">{{ num }}</a>
{% else %}
<a href="?page={{ num }}">{{ num }}</a>
{% endif %}
{% elif num == 1 or num == paginator.num_pages %}
{% if selected_website %}
<a href="?website={{ selected_website.id }}&page={{ num }}">{{ num }}</a>
{% else %}
<a href="?page={{ num }}">{{ num }}</a>
{% endif %}
{% elif num == page_obj.number|add:'-3' or num == page_obj.number|add:'3' %}
<span class="ellipsis">...</span>
{% endif %}
{% endfor %}
{% endwith %}
{% if page_obj.has_next %} {% if page_obj.has_next %}
{% if selected_website %} {% if selected_website %}
<a href="?website={{ selected_website.id }}&page={{ page_obj.next_page_number }}">下一页</a> <a href="?website={{ selected_website.id }}&page={{ page_obj.next_page_number }}">下一页</a>
<a href="?website={{ selected_website.id }}&page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
{% else %} {% else %}
<a href="?page={{ page_obj.next_page_number }}">下一页</a> <a href="?page={{ page_obj.next_page_number }}">下一页</a>
<a href="?page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
{% endif %} {% endif %}
{% endif %} {% endif %}
</div> </div>

View File

@@ -77,11 +77,11 @@ def process_article(url, website):
elif website.name == "东方烟草报": elif website.name == "东方烟草报":
# 优化东方烟草报的标题提取逻辑,按优先级尝试多种选择器 # 优化东方烟草报的标题提取逻辑,按优先级尝试多种选择器
title_tag = ( title_tag = (
soup.find("h1", id="title") or # 特别针对带id="title"的h1标签 soup.find("h1", id="title") or # 特别针对带id="title"的h1标签
soup.find("h1") or # 主要标题标签 soup.find("h1") or # 主要标题标签
soup.find("title") or # 页面title标签 soup.find("title") or # 页面title标签
soup.find("div", class_="title") or # 某些页面可能使用div.title soup.find("div", class_="title") or # 某些页面可能使用div.title
soup.find("h2") # 备选标题标签 soup.find("h2") # 备选标题标签
) )
content_tag = soup.find("div", class_="content") # 东方烟草报的内容通常在div.content中 content_tag = soup.find("div", class_="content") # 东方烟草报的内容通常在div.content中
# 增加对另一种内容结构的支持 # 增加对另一种内容结构的支持
@@ -184,16 +184,16 @@ def full_site_crawler(start_url, website, max_pages=1000):
parsed_url = urlparse(url) parsed_url = urlparse(url)
path = parsed_url.path path = parsed_url.path
is_article_page = ( is_article_page = (
soup.find("div", class_="content") is not None or soup.find("div", class_="content") is not None or
soup.find("div", id="gallery") is not None or soup.find("div", id="gallery") is not None or
soup.find("div", id="ContentText") is not None or soup.find("div", id="ContentText") is not None or
("/content/" in path and len(path) > 20) ("/content/" in path and len(path) > 20)
) )
else: else:
# 默认判断逻辑 # 默认判断逻辑
is_article_page = ( is_article_page = (
soup.find("div", class_="content") is not None or soup.find("div", class_="content") is not None or
soup.find("div", id="content") is not None soup.find("div", id="content") is not None
) )
# 如果是文章页面,则调用文章处理 # 如果是文章页面,则调用文章处理