Support keword
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
from django.db import models
|
||||
from django.utils import timezone
|
||||
import json
|
||||
|
||||
|
||||
class Website(models.Model):
|
||||
@@ -25,3 +27,93 @@ class Article(models.Model):
|
||||
|
||||
def __str__(self):
|
||||
return self.title
|
||||
|
||||
|
||||
class CrawlTask(models.Model):
|
||||
"""爬取任务模型"""
|
||||
TASK_STATUS_CHOICES = [
|
||||
('pending', '等待中'),
|
||||
('running', '运行中'),
|
||||
('completed', '已完成'),
|
||||
('failed', '失败'),
|
||||
('cancelled', '已取消'),
|
||||
]
|
||||
|
||||
TASK_TYPE_CHOICES = [
|
||||
('keyword', '关键词搜索'),
|
||||
('historical', '历史文章'),
|
||||
('full_site', '全站爬取'),
|
||||
]
|
||||
|
||||
name = models.CharField(max_length=200, verbose_name="任务名称")
|
||||
task_type = models.CharField(max_length=20, choices=TASK_TYPE_CHOICES, default='keyword', verbose_name="任务类型")
|
||||
keyword = models.CharField(max_length=200, blank=True, null=True, verbose_name="搜索关键词")
|
||||
websites = models.ManyToManyField(Website, blank=True, verbose_name="目标网站")
|
||||
start_date = models.DateField(blank=True, null=True, verbose_name="开始日期")
|
||||
end_date = models.DateField(blank=True, null=True, verbose_name="结束日期")
|
||||
max_pages = models.IntegerField(default=10, verbose_name="最大页数")
|
||||
max_articles = models.IntegerField(default=100, verbose_name="最大文章数")
|
||||
|
||||
status = models.CharField(max_length=20, choices=TASK_STATUS_CHOICES, default='pending', verbose_name="状态")
|
||||
progress = models.IntegerField(default=0, verbose_name="进度百分比")
|
||||
current_website = models.CharField(max_length=100, blank=True, null=True, verbose_name="当前网站")
|
||||
current_action = models.CharField(max_length=200, blank=True, null=True, verbose_name="当前操作")
|
||||
|
||||
total_articles = models.IntegerField(default=0, verbose_name="总文章数")
|
||||
success_count = models.IntegerField(default=0, verbose_name="成功数")
|
||||
failed_count = models.IntegerField(default=0, verbose_name="失败数")
|
||||
|
||||
created_at = models.DateTimeField(auto_now_add=True, verbose_name="创建时间")
|
||||
started_at = models.DateTimeField(blank=True, null=True, verbose_name="开始时间")
|
||||
completed_at = models.DateTimeField(blank=True, null=True, verbose_name="完成时间")
|
||||
|
||||
error_message = models.TextField(blank=True, null=True, verbose_name="错误信息")
|
||||
result_details = models.JSONField(default=dict, blank=True, verbose_name="结果详情")
|
||||
|
||||
created_by = models.CharField(max_length=100, blank=True, null=True, verbose_name="创建者")
|
||||
|
||||
class Meta:
|
||||
verbose_name = "爬取任务"
|
||||
verbose_name_plural = "爬取任务"
|
||||
ordering = ['-created_at']
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name} ({self.get_status_display()})"
|
||||
|
||||
def get_websites_display(self):
|
||||
"""获取网站列表的显示文本"""
|
||||
websites = self.websites.all()
|
||||
if not websites:
|
||||
return "所有网站"
|
||||
return ", ".join([w.name for w in websites])
|
||||
|
||||
def get_duration(self):
|
||||
"""获取任务执行时长"""
|
||||
if not self.started_at:
|
||||
return None
|
||||
end_time = self.completed_at or timezone.now()
|
||||
return end_time - self.started_at
|
||||
|
||||
def is_running(self):
|
||||
"""判断任务是否正在运行"""
|
||||
return self.status == 'running'
|
||||
|
||||
def can_cancel(self):
|
||||
"""判断任务是否可以取消"""
|
||||
return self.status in ['pending', 'running']
|
||||
|
||||
def get_progress_display(self):
|
||||
"""获取进度显示文本"""
|
||||
if self.status == 'pending':
|
||||
return "等待开始"
|
||||
elif self.status == 'running':
|
||||
if self.current_website and self.current_action:
|
||||
return f"正在处理 {self.current_website}: {self.current_action}"
|
||||
return f"运行中 ({self.progress}%)"
|
||||
elif self.status == 'completed':
|
||||
return f"已完成 ({self.success_count}/{self.total_articles})"
|
||||
elif self.status == 'failed':
|
||||
return f"失败: {self.error_message[:50]}..." if self.error_message else "失败"
|
||||
elif self.status == 'cancelled':
|
||||
return "已取消"
|
||||
return "未知状态"
|
||||
Reference in New Issue
Block a user