from django.core.management.base import BaseCommand from crawler.models import CrawlTask, Website from crawler.tasks import crawl_websites_task class Command(BaseCommand): help = '运行爬虫任务' def add_arguments(self, parser): parser.add_argument( '--keywords', type=str, required=True, help='搜索关键字,多个关键字用逗号分隔' ) parser.add_argument( '--websites', type=str, help='网站ID列表,用逗号分隔。不指定则爬取所有网站' ) parser.add_argument( '--name', type=str, help='任务名称' ) def handle(self, *args, **options): keywords = options['keywords'] website_ids = options.get('websites') task_name = options.get('name', f'关键字搜索: {keywords}') # 获取目标网站 if website_ids: website_id_list = [int(id.strip()) for id in website_ids.split(',')] websites = Website.objects.filter(id__in=website_id_list, is_active=True) else: websites = Website.objects.filter(is_active=True) if not websites.exists(): self.stdout.write( self.style.ERROR('没有找到可用的网站') ) return # 创建任务 task = CrawlTask.objects.create( name=task_name, keywords=keywords, created_by='management_command' ) task.websites.set(websites) self.stdout.write(f'创建任务: {task.name}') self.stdout.write(f'目标网站: {websites.count()} 个') self.stdout.write(f'搜索关键字: {keywords}') # 启动任务(同步模式,如果没有Redis则直接运行) try: crawl_websites_task.delay(task.id) self.stdout.write('任务已提交到队列') except Exception as e: self.stdout.write(f'队列不可用,直接运行任务: {e}') from crawler.crawler_engine import WebsiteCrawler crawler = WebsiteCrawler(task.id) crawler.run() self.stdout.write( self.style.SUCCESS(f'任务已启动,任务ID: {task.id}') )