support keyword crawl

This commit is contained in:
2025-09-26 10:39:36 +08:00
parent 499454ff27
commit a36d730384
12 changed files with 2370 additions and 505 deletions

View File

@@ -72,6 +72,11 @@ class CrawlTask(models.Model):
created_by = models.CharField(max_length=100, blank=True, null=True, verbose_name="创建者")
# 执行历史字段
execution_count = models.IntegerField(default=0, verbose_name="执行次数")
last_execution_at = models.DateTimeField(blank=True, null=True, verbose_name="最后执行时间")
execution_history = models.JSONField(default=list, blank=True, verbose_name="执行历史")
class Meta:
verbose_name = "爬取任务"
verbose_name_plural = "爬取任务"
@@ -122,4 +127,47 @@ class CrawlTask(models.Model):
return f"失败: {self.error_message[:50]}..." if self.error_message else "失败"
elif self.status == 'cancelled':
return "已取消"
return "未知状态"
return "未知状态"
def add_execution_record(self, status, started_at=None, completed_at=None, error_message=None):
"""添加执行记录"""
if not started_at:
started_at = timezone.now()
execution_record = {
'execution_id': len(self.execution_history) + 1,
'started_at': started_at.isoformat() if started_at else None,
'completed_at': completed_at.isoformat() if completed_at else None,
'status': status,
'error_message': error_message,
'success_count': self.success_count,
'failed_count': self.failed_count,
'total_articles': self.total_articles
}
# 更新执行历史
if not self.execution_history:
self.execution_history = []
self.execution_history.append(execution_record)
# 更新执行次数和最后执行时间
self.execution_count += 1
self.last_execution_at = started_at
# 只保留最近10次执行记录
if len(self.execution_history) > 10:
self.execution_history = self.execution_history[-10:]
self.save()
def get_execution_summary(self):
"""获取执行摘要"""
if not self.execution_history:
return "暂无执行记录"
total_executions = len(self.execution_history)
successful_executions = len([r for r in self.execution_history if r['status'] == 'completed'])
failed_executions = len([r for r in self.execution_history if r['status'] == 'failed'])
return f"执行 {total_executions} 次,成功 {successful_executions} 次,失败 {failed_executions}"