Remove the other

This commit is contained in:
2025-08-15 04:07:08 +08:00
parent d3760c5780
commit 8db0512a6a

View File

@@ -679,31 +679,6 @@ def process_article(url, website):
soup.find("div", class_="article-body") soup.find("div", class_="article-body")
) )
# 针对旗帜网的特殊处理,清理内容中的无关元素
if content_tag:
# 移除编辑信息
for editor_element in content_tag.find_all("p", class_="editor"):
editor_element.decompose()
# 移除分享相关元素
for share_element in content_tag.find_all("div", class_="share-demo"):
share_element.decompose()
# 移除文字缩放相关元素
for scale_element in content_tag.find_all("div", class_="scale-main"):
scale_element.decompose()
# 移除无关的div.pic元素
for pic_element in content_tag.find_all("div", class_="pic"):
pic_element.decompose()
# 移除无关的zdfy元素
for zdfy_element in content_tag.find_all("div", class_="zdfy"):
zdfy_element.decompose()
# 移除无关的center元素
for center_element in content_tag.find_all("center"):
center_element.decompose()
elif "中国网" in website.name or "china.com.cn" in website.name: elif "中国网" in website.name or "china.com.cn" in website.name:
# 中国网的文章结构处理 - 修复不保存文章内容问题 # 中国网的文章结构处理 - 修复不保存文章内容问题
title_tag = ( title_tag = (