Base setup

This commit is contained in:
2025-09-23 13:30:03 +08:00
parent 1057ed8690
commit e51154bb29
34 changed files with 2574 additions and 1 deletions

View File

@@ -0,0 +1,106 @@
# Generated by Django 5.2.6 on 2025-09-22 16:27
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
]
operations = [
migrations.CreateModel(
name='SearchKeyword',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('keyword', models.CharField(max_length=100, unique=True, verbose_name='关键字')),
('is_active', models.BooleanField(default=True, verbose_name='是否启用')),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
('last_used', models.DateTimeField(blank=True, null=True, verbose_name='最后使用时间')),
],
options={
'verbose_name': '搜索关键字',
'verbose_name_plural': '搜索关键字',
'ordering': ['-last_used', '-created_at'],
},
),
migrations.CreateModel(
name='Website',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=100, verbose_name='网站名称')),
('url', models.URLField(verbose_name='网站地址')),
('region', models.CharField(max_length=50, verbose_name='所属地区')),
('is_active', models.BooleanField(default=True, verbose_name='是否启用')),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
('updated_at', models.DateTimeField(auto_now=True, verbose_name='更新时间')),
],
options={
'verbose_name': '目标网站',
'verbose_name_plural': '目标网站',
'ordering': ['region', 'name'],
},
),
migrations.CreateModel(
name='CrawlTask',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=200, verbose_name='任务名称')),
('keywords', models.TextField(help_text='多个关键字用逗号分隔', verbose_name='搜索关键字')),
('status', models.CharField(choices=[('pending', '待执行'), ('running', '执行中'), ('completed', '已完成'), ('failed', '执行失败'), ('cancelled', '已取消')], default='pending', max_length=20, verbose_name='任务状态')),
('created_by', models.CharField(default='system', max_length=100, verbose_name='创建者')),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
('started_at', models.DateTimeField(blank=True, null=True, verbose_name='开始时间')),
('completed_at', models.DateTimeField(blank=True, null=True, verbose_name='完成时间')),
('error_message', models.TextField(blank=True, verbose_name='错误信息')),
('total_pages', models.IntegerField(default=0, verbose_name='总页数')),
('crawled_pages', models.IntegerField(default=0, verbose_name='已爬取页数')),
('websites', models.ManyToManyField(to='crawler.website', verbose_name='目标网站')),
],
options={
'verbose_name': '爬取任务',
'verbose_name_plural': '爬取任务',
'ordering': ['-created_at'],
},
),
migrations.CreateModel(
name='CrawlLog',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('level', models.CharField(choices=[('info', '信息'), ('warning', '警告'), ('error', '错误'), ('debug', '调试')], max_length=20, verbose_name='日志级别')),
('message', models.TextField(verbose_name='日志消息')),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='记录时间')),
('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='logs', to='crawler.crawltask', verbose_name='所属任务')),
('website', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='crawler.website', verbose_name='相关网站')),
],
options={
'verbose_name': '爬取日志',
'verbose_name_plural': '爬取日志',
'ordering': ['-created_at'],
},
),
migrations.CreateModel(
name='CrawledContent',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('title', models.CharField(max_length=500, verbose_name='标题')),
('content', models.TextField(verbose_name='内容')),
('url', models.URLField(verbose_name='原文链接')),
('publish_date', models.DateTimeField(blank=True, null=True, verbose_name='发布时间')),
('author', models.CharField(blank=True, max_length=100, verbose_name='作者')),
('keywords_matched', models.TextField(help_text='匹配到的关键字,用逗号分隔', verbose_name='匹配的关键字')),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='爬取时间')),
('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='contents', to='crawler.crawltask', verbose_name='所属任务')),
('website', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='crawler.website', verbose_name='来源网站')),
],
options={
'verbose_name': '爬取内容',
'verbose_name_plural': '爬取内容',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['task', 'website'], name='crawler_cra_task_id_6244e7_idx'), models.Index(fields=['created_at'], name='crawler_cra_created_a116d2_idx'), models.Index(fields=['publish_date'], name='crawler_cra_publish_5b8ccc_idx')],
},
),
]

View File

@@ -0,0 +1,24 @@
# Generated by Django 5.2.6 on 2025-09-23 00:38
import crawler.models
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('crawler', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='crawledcontent',
name='is_local_saved',
field=models.BooleanField(default=False, verbose_name='是否已本地保存'),
),
migrations.AddField(
model_name='crawledcontent',
name='local_file',
field=models.FileField(blank=True, null=True, upload_to=crawler.models.crawled_content_file_path, verbose_name='本地文件'),
),
]

View File

@@ -0,0 +1,35 @@
# Generated by Django 5.2.6 on 2025-09-23 01:05
import crawler.models
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('crawler', '0002_crawledcontent_is_local_saved_and_more'),
]
operations = [
migrations.CreateModel(
name='MediaFile',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('media_type', models.CharField(choices=[('image', '图片'), ('video', '视频'), ('audio', '音频'), ('document', '文档')], max_length=20, verbose_name='媒体类型')),
('original_url', models.URLField(verbose_name='原始URL')),
('local_file', models.FileField(upload_to=crawler.models.media_file_path, verbose_name='本地文件')),
('file_size', models.BigIntegerField(blank=True, null=True, verbose_name='文件大小(字节)')),
('mime_type', models.CharField(blank=True, max_length=100, verbose_name='MIME类型')),
('alt_text', models.CharField(blank=True, max_length=500, verbose_name='替代文本')),
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
('content', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='media_files', to='crawler.crawledcontent', verbose_name='所属内容')),
],
options={
'verbose_name': '媒体文件',
'verbose_name_plural': '媒体文件',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['content', 'media_type'], name='crawler_med_content_3a9468_idx'), models.Index(fields=['created_at'], name='crawler_med_created_13ff00_idx')],
},
),
]

View File