Fix chinadaily bug : Support more packages
This commit is contained in:
@@ -3,6 +3,7 @@ from core.models import Website
|
||||
from core.utils import full_site_crawler
|
||||
|
||||
|
||||
# jimmy.fang-20250815: 因URL问题,移除中国网-省份
|
||||
class Command(BaseCommand):
|
||||
help = "全站递归爬取 中国网主网及中国网一省份,不转发二级子网站"
|
||||
|
||||
@@ -22,12 +23,12 @@ class Command(BaseCommand):
|
||||
'start_url': 'http://www.china.com.cn',
|
||||
'article_selector': 'a'
|
||||
},
|
||||
'province': {
|
||||
'name': '中国网一省份',
|
||||
'base_url': 'http://www.china.com.cn',
|
||||
'start_url': 'http://www.china.com.cn/province',
|
||||
'article_selector': 'a'
|
||||
}
|
||||
# 'province': {
|
||||
# 'name': '中国网一省份',
|
||||
# 'base_url': 'http://www.china.com.cn',
|
||||
# 'start_url': 'http://www.china.com.cn/province',
|
||||
# 'article_selector': 'a'
|
||||
# }
|
||||
}
|
||||
|
||||
if platform == 'all':
|
||||
|
||||
@@ -9,7 +9,7 @@ class Command(BaseCommand):
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--platform', type=str, default='all',
|
||||
choices=['chinadaily', 'mobile', 'all'],
|
||||
help='选择爬取平台: chinadaily(中国日报), mobile(移动端), all(全部)')
|
||||
help='选择爬取平台: chinadaily(中国日报), all(全部)')
|
||||
|
||||
def handle(self, *args, **options):
|
||||
platform = options['platform']
|
||||
@@ -22,12 +22,7 @@ class Command(BaseCommand):
|
||||
'start_url': 'https://www.chinadaily.com.cn',
|
||||
'article_selector': 'a'
|
||||
},
|
||||
'mobile': {
|
||||
'name': '中国日报移动端',
|
||||
'base_url': 'https://m.chinadaily.com.cn',
|
||||
'start_url': 'https://m.chinadaily.com.cn',
|
||||
'article_selector': 'a'
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if platform == 'all':
|
||||
|
||||
Reference in New Issue
Block a user