Files
icac/crawler/management/commands/run_crawler.py
2025-09-23 13:30:03 +08:00

70 lines
2.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from django.core.management.base import BaseCommand
from crawler.models import CrawlTask, Website
from crawler.tasks import crawl_websites_task
class Command(BaseCommand):
help = '运行爬虫任务'
def add_arguments(self, parser):
parser.add_argument(
'--keywords',
type=str,
required=True,
help='搜索关键字,多个关键字用逗号分隔'
)
parser.add_argument(
'--websites',
type=str,
help='网站ID列表用逗号分隔。不指定则爬取所有网站'
)
parser.add_argument(
'--name',
type=str,
help='任务名称'
)
def handle(self, *args, **options):
keywords = options['keywords']
website_ids = options.get('websites')
task_name = options.get('name', f'关键字搜索: {keywords}')
# 获取目标网站
if website_ids:
website_id_list = [int(id.strip()) for id in website_ids.split(',')]
websites = Website.objects.filter(id__in=website_id_list, is_active=True)
else:
websites = Website.objects.filter(is_active=True)
if not websites.exists():
self.stdout.write(
self.style.ERROR('没有找到可用的网站')
)
return
# 创建任务
task = CrawlTask.objects.create(
name=task_name,
keywords=keywords,
created_by='management_command'
)
task.websites.set(websites)
self.stdout.write(f'创建任务: {task.name}')
self.stdout.write(f'目标网站: {websites.count()}')
self.stdout.write(f'搜索关键字: {keywords}')
# 启动任务同步模式如果没有Redis则直接运行
try:
crawl_websites_task.delay(task.id)
self.stdout.write('任务已提交到队列')
except Exception as e:
self.stdout.write(f'队列不可用,直接运行任务: {e}')
from crawler.crawler_engine import WebsiteCrawler
crawler = WebsiteCrawler(task.id)
crawler.run()
self.stdout.write(
self.style.SUCCESS(f'任务已启动任务ID: {task.id}')
)