fix bugs and support all platform

This commit is contained in:
2025-08-15 08:33:47 +08:00
parent e82b85f4dd
commit 4945b4c6b0
36 changed files with 2296 additions and 992 deletions

View File

@@ -3,13 +3,12 @@ from core.models import Website
from core.utils import full_site_crawler
# jimmy.fang-20250815: 因URL问题移除中国网-省份
class Command(BaseCommand):
help = "全站递归爬取 中国网主网及中国网一省份,不转发二级子网站"
def add_arguments(self, parser):
parser.add_argument('--platform', type=str, default='all',
choices=['china', 'province', 'all'],
choices=['china', 'all'],
help='选择爬取平台: china(中国网主网), province(中国网一省份), all(全部)')
def handle(self, *args, **options):
@@ -23,12 +22,7 @@ class Command(BaseCommand):
'start_url': 'http://www.china.com.cn',
'article_selector': 'a'
},
# 'province': {
# 'name': '中国网一省份',
# 'base_url': 'http://www.china.com.cn',
# 'start_url': 'http://www.china.com.cn/province',
# 'article_selector': 'a'
# }
}
if platform == 'all':