Base setup

This commit is contained in:
2025-09-23 13:30:03 +08:00
parent 1057ed8690
commit e51154bb29
34 changed files with 2574 additions and 1 deletions

36
crawler/tasks.py Normal file
View File

@@ -0,0 +1,36 @@
from celery import shared_task
from .crawler_engine import run_crawl_task as execute_crawl_task
@shared_task
def crawl_websites_task(task_id):
"""爬取网站的Celery任务"""
return execute_crawl_task(task_id)
@shared_task
def run_crawl_task(task_id):
"""执行爬取任务的Celery任务为管理界面提供"""
return execute_crawl_task(task_id)
@shared_task
def cleanup_old_tasks():
"""清理旧任务保留最近30天的任务"""
from django.utils import timezone
from datetime import timedelta
from .models import CrawlTask, CrawlLog, CrawledContent
cutoff_date = timezone.now() - timedelta(days=30)
# 删除30天前的任务及其相关数据
old_tasks = CrawlTask.objects.filter(created_at__lt=cutoff_date)
count = old_tasks.count()
for task in old_tasks:
# 删除相关的内容和日志
CrawledContent.objects.filter(task=task).delete()
CrawlLog.objects.filter(task=task).delete()
task.delete()
return f"清理了 {count} 个旧任务"