Remove the other
This commit is contained in:
@@ -679,31 +679,6 @@ def process_article(url, website):
|
|||||||
soup.find("div", class_="article-body")
|
soup.find("div", class_="article-body")
|
||||||
)
|
)
|
||||||
|
|
||||||
# 针对旗帜网的特殊处理,清理内容中的无关元素
|
|
||||||
if content_tag:
|
|
||||||
# 移除编辑信息
|
|
||||||
for editor_element in content_tag.find_all("p", class_="editor"):
|
|
||||||
editor_element.decompose()
|
|
||||||
|
|
||||||
# 移除分享相关元素
|
|
||||||
for share_element in content_tag.find_all("div", class_="share-demo"):
|
|
||||||
share_element.decompose()
|
|
||||||
|
|
||||||
# 移除文字缩放相关元素
|
|
||||||
for scale_element in content_tag.find_all("div", class_="scale-main"):
|
|
||||||
scale_element.decompose()
|
|
||||||
|
|
||||||
# 移除无关的div.pic元素
|
|
||||||
for pic_element in content_tag.find_all("div", class_="pic"):
|
|
||||||
pic_element.decompose()
|
|
||||||
|
|
||||||
# 移除无关的zdfy元素
|
|
||||||
for zdfy_element in content_tag.find_all("div", class_="zdfy"):
|
|
||||||
zdfy_element.decompose()
|
|
||||||
|
|
||||||
# 移除无关的center元素
|
|
||||||
for center_element in content_tag.find_all("center"):
|
|
||||||
center_element.decompose()
|
|
||||||
elif "中国网" in website.name or "china.com.cn" in website.name:
|
elif "中国网" in website.name or "china.com.cn" in website.name:
|
||||||
# 中国网的文章结构处理 - 修复不保存文章内容问题
|
# 中国网的文章结构处理 - 修复不保存文章内容问题
|
||||||
title_tag = (
|
title_tag = (
|
||||||
|
|||||||
Reference in New Issue
Block a user