Remove the other
This commit is contained in:
@@ -679,31 +679,6 @@ def process_article(url, website):
|
||||
soup.find("div", class_="article-body")
|
||||
)
|
||||
|
||||
# 针对旗帜网的特殊处理,清理内容中的无关元素
|
||||
if content_tag:
|
||||
# 移除编辑信息
|
||||
for editor_element in content_tag.find_all("p", class_="editor"):
|
||||
editor_element.decompose()
|
||||
|
||||
# 移除分享相关元素
|
||||
for share_element in content_tag.find_all("div", class_="share-demo"):
|
||||
share_element.decompose()
|
||||
|
||||
# 移除文字缩放相关元素
|
||||
for scale_element in content_tag.find_all("div", class_="scale-main"):
|
||||
scale_element.decompose()
|
||||
|
||||
# 移除无关的div.pic元素
|
||||
for pic_element in content_tag.find_all("div", class_="pic"):
|
||||
pic_element.decompose()
|
||||
|
||||
# 移除无关的zdfy元素
|
||||
for zdfy_element in content_tag.find_all("div", class_="zdfy"):
|
||||
zdfy_element.decompose()
|
||||
|
||||
# 移除无关的center元素
|
||||
for center_element in content_tag.find_all("center"):
|
||||
center_element.decompose()
|
||||
elif "中国网" in website.name or "china.com.cn" in website.name:
|
||||
# 中国网的文章结构处理 - 修复不保存文章内容问题
|
||||
title_tag = (
|
||||
|
||||
Reference in New Issue
Block a user