Support CCTV Plamforms
This commit is contained in:
@@ -233,8 +233,12 @@ def process_article(url, website):
|
||||
soup.find("div", id="content") or
|
||||
soup.find("div", class_="text") or
|
||||
soup.find("div", class_="main-content") or
|
||||
soup.find("div", class_="article")
|
||||
soup.find("div", class_="article") or
|
||||
# 添加央视新闻特有的内容容器
|
||||
soup.find("div", class_="content_area") or
|
||||
soup.find("div", id="content_area")
|
||||
)
|
||||
|
||||
elif "求是" in website.name:
|
||||
# 求是网站的文章结构处理 - 修复两个标题问题
|
||||
title_tag = (
|
||||
@@ -553,7 +557,6 @@ def process_article(url, website):
|
||||
title_tag = (
|
||||
soup.find("h1", class_="title") or
|
||||
soup.find("h1") or
|
||||
soup.find("p", class_="f_container_title") or # 添加中国妇女报特有标题容器
|
||||
soup.find("title")
|
||||
)
|
||||
content_tag = (
|
||||
@@ -563,9 +566,7 @@ def process_article(url, website):
|
||||
soup.find("div", class_="text") or
|
||||
soup.find("div", class_="main-content") or
|
||||
soup.find("div", class_="article") or
|
||||
soup.find("div", class_="article-body") or
|
||||
soup.find("div", class_="f_container_left") or # 添加中国妇女报特有内容容器
|
||||
soup.find("div", class_="f_container") # 添加另一种可能的内容容器
|
||||
soup.find("div", class_="article-body")
|
||||
)
|
||||
elif "法治日报" in website.name or "legaldaily" in website.name:
|
||||
# 法治日报的文章结构处理 - 修复无法爬取问题
|
||||
|
||||
Reference in New Issue
Block a user