Files
green_classroom/core/management/commands/crawl_xinhua.py
2025-08-11 13:10:23 +08:00

24 lines
848 B
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from django.core.management.base import BaseCommand
from core.models import Website
from core.utils import crawl_xinhua_article
class Command(BaseCommand):
help = '爬取新华网文章示例'
def handle(self, *args, **options):
# 假设你事先在后台建了“新华网”这个Website实例
website_name = "新华网"
try:
website = Website.objects.get(name=website_name)
except Website.DoesNotExist:
self.stdout.write(self.style.ERROR(f"网站 '{website_name}' 不存在,请先后台创建"))
return
# 这里写你想爬取的文章URL列表可以循环多篇
urls = [
"https://www.news.cn/legal/20250721/f340f7be3d5b4b938cbd6b9889b6fbdc/c.html",
]
for url in urls:
crawl_xinhua_article(url, website)