245 lines
9.5 KiB
Python
245 lines
9.5 KiB
Python
from django.db import models
|
|
from django.utils import timezone
|
|
import json
|
|
|
|
|
|
class SiteConfig(models.Model):
|
|
"""网站配置模型"""
|
|
BACKGROUND_SIZE_CHOICES = [
|
|
('cover', '覆盖整个区域'),
|
|
('contain', '完整显示图片'),
|
|
('100% 100%', '拉伸填满'),
|
|
('auto', '原始大小'),
|
|
]
|
|
|
|
BACKGROUND_POSITION_CHOICES = [
|
|
('center', '居中'),
|
|
('top', '顶部'),
|
|
('bottom', '底部'),
|
|
('left', '左侧'),
|
|
('right', '右侧'),
|
|
('top left', '左上角'),
|
|
('top right', '右上角'),
|
|
('bottom left', '左下角'),
|
|
('bottom right', '右下角'),
|
|
]
|
|
|
|
site_title = models.CharField(max_length=200, default="绿美泉烟绿色课堂", verbose_name="网站标题")
|
|
show_title = models.BooleanField(default=True, verbose_name="前台显示标题")
|
|
header_background_image = models.ImageField(
|
|
upload_to='site_config/',
|
|
blank=True,
|
|
null=True,
|
|
verbose_name="版头背景图片"
|
|
)
|
|
header_background_color = models.CharField(
|
|
max_length=7,
|
|
default="#667eea",
|
|
verbose_name="版头背景颜色"
|
|
)
|
|
header_background_size = models.CharField(
|
|
max_length=20,
|
|
choices=BACKGROUND_SIZE_CHOICES,
|
|
default='cover',
|
|
verbose_name="背景图片大小"
|
|
)
|
|
header_background_position = models.CharField(
|
|
max_length=20,
|
|
choices=BACKGROUND_POSITION_CHOICES,
|
|
default='center',
|
|
verbose_name="背景图片位置"
|
|
)
|
|
header_height = models.IntegerField(
|
|
default=200,
|
|
verbose_name="版头高度(像素)"
|
|
)
|
|
created_at = models.DateTimeField(auto_now_add=True, verbose_name="创建时间")
|
|
updated_at = models.DateTimeField(auto_now=True, verbose_name="更新时间")
|
|
|
|
class Meta:
|
|
verbose_name = "网站配置"
|
|
verbose_name_plural = "网站配置"
|
|
|
|
def __str__(self):
|
|
return f"网站配置 - {self.site_title}"
|
|
|
|
@classmethod
|
|
def get_config(cls):
|
|
"""获取网站配置,如果不存在则创建默认配置"""
|
|
config, created = cls.objects.get_or_create(
|
|
defaults={
|
|
'site_title': '绿美泉烟绿色课堂',
|
|
'header_background_color': '#667eea'
|
|
}
|
|
)
|
|
return config
|
|
|
|
|
|
class Website(models.Model):
|
|
name = models.CharField(max_length=100, unique=True)
|
|
base_url = models.URLField()
|
|
description = models.TextField(blank=True, null=True)
|
|
article_list_url = models.URLField(blank=True, null=True)
|
|
article_selector = models.CharField(max_length=255, blank=True, null=True)
|
|
content_selector = models.CharField(max_length=255, blank=True, null=True)
|
|
enabled = models.BooleanField(default=True)
|
|
|
|
def __str__(self):
|
|
return self.name
|
|
|
|
|
|
class Article(models.Model):
|
|
website = models.ForeignKey(Website, on_delete=models.CASCADE)
|
|
title = models.CharField(max_length=300)
|
|
url = models.URLField(unique=True)
|
|
pub_date = models.DateTimeField(null=True, blank=True)
|
|
content = models.TextField() # html内容
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
media_files = models.JSONField(default=list, blank=True)
|
|
|
|
def __str__(self):
|
|
return self.title
|
|
|
|
|
|
class CrawlTask(models.Model):
|
|
"""爬取任务模型"""
|
|
TASK_STATUS_CHOICES = [
|
|
('pending', '等待中'),
|
|
('running', '运行中'),
|
|
('completed', '已完成'),
|
|
('failed', '失败'),
|
|
('cancelled', '已取消'),
|
|
]
|
|
|
|
TASK_TYPE_CHOICES = [
|
|
('keyword', '关键词搜索'),
|
|
('historical', '历史文章'),
|
|
('full_site', '全站爬取'),
|
|
]
|
|
|
|
name = models.CharField(max_length=200, verbose_name="任务名称")
|
|
task_type = models.CharField(max_length=20, choices=TASK_TYPE_CHOICES, default='keyword', verbose_name="任务类型")
|
|
keyword = models.CharField(max_length=200, blank=True, null=True, verbose_name="搜索关键词")
|
|
websites = models.ManyToManyField(Website, blank=True, verbose_name="目标网站")
|
|
start_date = models.DateField(blank=True, null=True, verbose_name="开始日期")
|
|
end_date = models.DateField(blank=True, null=True, verbose_name="结束日期")
|
|
max_pages = models.IntegerField(default=10, verbose_name="最大页数")
|
|
max_articles = models.IntegerField(default=100, verbose_name="最大文章数")
|
|
|
|
status = models.CharField(max_length=20, choices=TASK_STATUS_CHOICES, default='pending', verbose_name="状态")
|
|
progress = models.IntegerField(default=0, verbose_name="进度百分比")
|
|
current_website = models.CharField(max_length=100, blank=True, null=True, verbose_name="当前网站")
|
|
current_action = models.CharField(max_length=200, blank=True, null=True, verbose_name="当前操作")
|
|
|
|
total_articles = models.IntegerField(default=0, verbose_name="总文章数")
|
|
success_count = models.IntegerField(default=0, verbose_name="成功数")
|
|
failed_count = models.IntegerField(default=0, verbose_name="失败数")
|
|
|
|
created_at = models.DateTimeField(auto_now_add=True, verbose_name="创建时间")
|
|
started_at = models.DateTimeField(blank=True, null=True, verbose_name="开始时间")
|
|
completed_at = models.DateTimeField(blank=True, null=True, verbose_name="完成时间")
|
|
|
|
error_message = models.TextField(blank=True, null=True, verbose_name="错误信息")
|
|
result_details = models.JSONField(default=dict, blank=True, verbose_name="结果详情")
|
|
|
|
created_by = models.CharField(max_length=100, blank=True, null=True, verbose_name="创建者")
|
|
|
|
# 执行历史字段
|
|
execution_count = models.IntegerField(default=0, verbose_name="执行次数")
|
|
last_execution_at = models.DateTimeField(blank=True, null=True, verbose_name="最后执行时间")
|
|
execution_history = models.JSONField(default=list, blank=True, verbose_name="执行历史")
|
|
|
|
class Meta:
|
|
verbose_name = "爬取任务"
|
|
verbose_name_plural = "爬取任务"
|
|
ordering = ['-created_at']
|
|
|
|
def __str__(self):
|
|
return f"{self.name} ({self.get_status_display()})"
|
|
|
|
def get_websites_display(self):
|
|
"""获取网站列表的显示文本"""
|
|
try:
|
|
websites = self.websites.all()
|
|
if not websites:
|
|
return "所有网站"
|
|
# 确保网站名称是字符串并可以被join处理
|
|
website_names = [str(w.name) for w in websites if w.name]
|
|
return ", ".join(website_names) if website_names else "所有网站"
|
|
except Exception:
|
|
# 如果出现任何异常,返回默认值
|
|
return "所有网站"
|
|
|
|
def get_duration(self):
|
|
"""获取任务执行时长"""
|
|
if not self.started_at:
|
|
return None
|
|
end_time = self.completed_at or timezone.now()
|
|
return end_time - self.started_at
|
|
|
|
def is_running(self):
|
|
"""判断任务是否正在运行"""
|
|
return self.status == 'running'
|
|
|
|
def can_cancel(self):
|
|
"""判断任务是否可以取消"""
|
|
return self.status in ['pending', 'running']
|
|
|
|
def get_progress_display(self):
|
|
"""获取进度显示文本"""
|
|
if self.status == 'pending':
|
|
return "等待开始"
|
|
elif self.status == 'running':
|
|
if self.current_website and self.current_action:
|
|
return f"正在处理 {self.current_website}: {self.current_action}"
|
|
return f"运行中 ({self.progress}%)"
|
|
elif self.status == 'completed':
|
|
return f"已完成 ({self.success_count}/{self.total_articles})"
|
|
elif self.status == 'failed':
|
|
return f"失败: {self.error_message[:50]}..." if self.error_message else "失败"
|
|
elif self.status == 'cancelled':
|
|
return "已取消"
|
|
return "未知状态"
|
|
|
|
def add_execution_record(self, status, started_at=None, completed_at=None, error_message=None):
|
|
"""添加执行记录"""
|
|
if not started_at:
|
|
started_at = timezone.now()
|
|
|
|
execution_record = {
|
|
'execution_id': len(self.execution_history) + 1,
|
|
'started_at': started_at.isoformat() if started_at else None,
|
|
'completed_at': completed_at.isoformat() if completed_at else None,
|
|
'status': status,
|
|
'error_message': error_message,
|
|
'success_count': self.success_count,
|
|
'failed_count': self.failed_count,
|
|
'total_articles': self.total_articles
|
|
}
|
|
|
|
# 更新执行历史
|
|
if not self.execution_history:
|
|
self.execution_history = []
|
|
|
|
self.execution_history.append(execution_record)
|
|
|
|
# 更新执行次数和最后执行时间
|
|
self.execution_count += 1
|
|
self.last_execution_at = started_at
|
|
|
|
# 只保留最近10次执行记录
|
|
if len(self.execution_history) > 10:
|
|
self.execution_history = self.execution_history[-10:]
|
|
|
|
self.save()
|
|
|
|
def get_execution_summary(self):
|
|
"""获取执行摘要"""
|
|
if not self.execution_history:
|
|
return "暂无执行记录"
|
|
|
|
total_executions = len(self.execution_history)
|
|
successful_executions = len([r for r in self.execution_history if r['status'] == 'completed'])
|
|
failed_executions = len([r for r in self.execution_history if r['status'] == 'failed'])
|
|
|
|
return f"执行 {total_executions} 次,成功 {successful_executions} 次,失败 {failed_executions} 次" |