Base setup
This commit is contained in:
36
crawler/tasks.py
Normal file
36
crawler/tasks.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from celery import shared_task
|
||||
from .crawler_engine import run_crawl_task as execute_crawl_task
|
||||
|
||||
|
||||
@shared_task
|
||||
def crawl_websites_task(task_id):
|
||||
"""爬取网站的Celery任务"""
|
||||
return execute_crawl_task(task_id)
|
||||
|
||||
|
||||
@shared_task
|
||||
def run_crawl_task(task_id):
|
||||
"""执行爬取任务的Celery任务(为管理界面提供)"""
|
||||
return execute_crawl_task(task_id)
|
||||
|
||||
|
||||
@shared_task
|
||||
def cleanup_old_tasks():
|
||||
"""清理旧任务(保留最近30天的任务)"""
|
||||
from django.utils import timezone
|
||||
from datetime import timedelta
|
||||
from .models import CrawlTask, CrawlLog, CrawledContent
|
||||
|
||||
cutoff_date = timezone.now() - timedelta(days=30)
|
||||
|
||||
# 删除30天前的任务及其相关数据
|
||||
old_tasks = CrawlTask.objects.filter(created_at__lt=cutoff_date)
|
||||
count = old_tasks.count()
|
||||
|
||||
for task in old_tasks:
|
||||
# 删除相关的内容和日志
|
||||
CrawledContent.objects.filter(task=task).delete()
|
||||
CrawlLog.objects.filter(task=task).delete()
|
||||
task.delete()
|
||||
|
||||
return f"清理了 {count} 个旧任务"
|
||||
Reference in New Issue
Block a user