from django.db import models from django.utils import timezone import json class SiteConfig(models.Model): """网站配置模型""" BACKGROUND_SIZE_CHOICES = [ ('cover', '覆盖整个区域'), ('contain', '完整显示图片'), ('100% 100%', '拉伸填满'), ('auto', '原始大小'), ] BACKGROUND_POSITION_CHOICES = [ ('center', '居中'), ('top', '顶部'), ('bottom', '底部'), ('left', '左侧'), ('right', '右侧'), ('top left', '左上角'), ('top right', '右上角'), ('bottom left', '左下角'), ('bottom right', '右下角'), ] site_title = models.CharField(max_length=200, default="绿美泉烟绿色课堂", verbose_name="网站标题") show_title = models.BooleanField(default=True, verbose_name="前台显示标题") header_background_image = models.ImageField( upload_to='site_config/', blank=True, null=True, verbose_name="版头背景图片" ) header_background_color = models.CharField( max_length=7, default="#667eea", verbose_name="版头背景颜色" ) header_background_size = models.CharField( max_length=20, choices=BACKGROUND_SIZE_CHOICES, default='cover', verbose_name="背景图片大小" ) header_background_position = models.CharField( max_length=20, choices=BACKGROUND_POSITION_CHOICES, default='center', verbose_name="背景图片位置" ) header_height = models.IntegerField( default=200, verbose_name="版头高度(像素)" ) created_at = models.DateTimeField(auto_now_add=True, verbose_name="创建时间") updated_at = models.DateTimeField(auto_now=True, verbose_name="更新时间") class Meta: verbose_name = "网站配置" verbose_name_plural = "网站配置" def __str__(self): return f"网站配置 - {self.site_title}" @classmethod def get_config(cls): """获取网站配置,如果不存在则创建默认配置""" config, created = cls.objects.get_or_create( defaults={ 'site_title': '绿美泉烟绿色课堂', 'header_background_color': '#667eea' } ) return config class Website(models.Model): name = models.CharField(max_length=100, unique=True) base_url = models.URLField() description = models.TextField(blank=True, null=True) article_list_url = models.URLField(blank=True, null=True) article_selector = models.CharField(max_length=255, blank=True, null=True) content_selector = models.CharField(max_length=255, blank=True, null=True) enabled = models.BooleanField(default=True) def __str__(self): return self.name class Article(models.Model): website = models.ForeignKey(Website, on_delete=models.CASCADE) title = models.CharField(max_length=300) url = models.URLField(unique=True) pub_date = models.DateTimeField(null=True, blank=True) content = models.TextField() # html内容 created_at = models.DateTimeField(auto_now_add=True) media_files = models.JSONField(default=list, blank=True) def __str__(self): return self.title class CrawlTask(models.Model): """爬取任务模型""" TASK_STATUS_CHOICES = [ ('pending', '等待中'), ('running', '运行中'), ('completed', '已完成'), ('failed', '失败'), ('cancelled', '已取消'), ] TASK_TYPE_CHOICES = [ ('keyword', '关键词搜索'), ('historical', '历史文章'), ('full_site', '全站爬取'), ] name = models.CharField(max_length=200, verbose_name="任务名称") task_type = models.CharField(max_length=20, choices=TASK_TYPE_CHOICES, default='keyword', verbose_name="任务类型") keyword = models.CharField(max_length=200, blank=True, null=True, verbose_name="搜索关键词") websites = models.ManyToManyField(Website, blank=True, verbose_name="目标网站") start_date = models.DateField(blank=True, null=True, verbose_name="开始日期") end_date = models.DateField(blank=True, null=True, verbose_name="结束日期") max_pages = models.IntegerField(default=10, verbose_name="最大页数") max_articles = models.IntegerField(default=100, verbose_name="最大文章数") status = models.CharField(max_length=20, choices=TASK_STATUS_CHOICES, default='pending', verbose_name="状态") progress = models.IntegerField(default=0, verbose_name="进度百分比") current_website = models.CharField(max_length=100, blank=True, null=True, verbose_name="当前网站") current_action = models.CharField(max_length=200, blank=True, null=True, verbose_name="当前操作") total_articles = models.IntegerField(default=0, verbose_name="总文章数") success_count = models.IntegerField(default=0, verbose_name="成功数") failed_count = models.IntegerField(default=0, verbose_name="失败数") created_at = models.DateTimeField(auto_now_add=True, verbose_name="创建时间") started_at = models.DateTimeField(blank=True, null=True, verbose_name="开始时间") completed_at = models.DateTimeField(blank=True, null=True, verbose_name="完成时间") error_message = models.TextField(blank=True, null=True, verbose_name="错误信息") result_details = models.JSONField(default=dict, blank=True, verbose_name="结果详情") created_by = models.CharField(max_length=100, blank=True, null=True, verbose_name="创建者") # 执行历史字段 execution_count = models.IntegerField(default=0, verbose_name="执行次数") last_execution_at = models.DateTimeField(blank=True, null=True, verbose_name="最后执行时间") execution_history = models.JSONField(default=list, blank=True, verbose_name="执行历史") class Meta: verbose_name = "爬取任务" verbose_name_plural = "爬取任务" ordering = ['-created_at'] def __str__(self): return f"{self.name} ({self.get_status_display()})" def get_websites_display(self): """获取网站列表的显示文本""" try: websites = self.websites.all() if not websites: return "所有网站" # 确保网站名称是字符串并可以被join处理 website_names = [str(w.name) for w in websites if w.name] return ", ".join(website_names) if website_names else "所有网站" except Exception: # 如果出现任何异常,返回默认值 return "所有网站" def get_duration(self): """获取任务执行时长""" if not self.started_at: return None end_time = self.completed_at or timezone.now() return end_time - self.started_at def is_running(self): """判断任务是否正在运行""" return self.status == 'running' def can_cancel(self): """判断任务是否可以取消""" return self.status in ['pending', 'running'] def get_progress_display(self): """获取进度显示文本""" if self.status == 'pending': return "等待开始" elif self.status == 'running': if self.current_website and self.current_action: return f"正在处理 {self.current_website}: {self.current_action}" return f"运行中 ({self.progress}%)" elif self.status == 'completed': return f"已完成 ({self.success_count}/{self.total_articles})" elif self.status == 'failed': return f"失败: {self.error_message[:50]}..." if self.error_message else "失败" elif self.status == 'cancelled': return "已取消" return "未知状态" def add_execution_record(self, status, started_at=None, completed_at=None, error_message=None): """添加执行记录""" if not started_at: started_at = timezone.now() execution_record = { 'execution_id': len(self.execution_history) + 1, 'started_at': started_at.isoformat() if started_at else None, 'completed_at': completed_at.isoformat() if completed_at else None, 'status': status, 'error_message': error_message, 'success_count': self.success_count, 'failed_count': self.failed_count, 'total_articles': self.total_articles } # 更新执行历史 if not self.execution_history: self.execution_history = [] self.execution_history.append(execution_record) # 更新执行次数和最后执行时间 self.execution_count += 1 self.last_execution_at = started_at # 只保留最近10次执行记录 if len(self.execution_history) > 10: self.execution_history = self.execution_history[-10:] self.save() def get_execution_summary(self): """获取执行摘要""" if not self.execution_history: return "暂无执行记录" total_executions = len(self.execution_history) successful_executions = len([r for r in self.execution_history if r['status'] == 'completed']) failed_executions = len([r for r in self.execution_history if r['status'] == 'failed']) return f"执行 {total_executions} 次,成功 {successful_executions} 次,失败 {failed_executions} 次"