Base setup
This commit is contained in:
106
crawler/migrations/0001_initial.py
Normal file
106
crawler/migrations/0001_initial.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# Generated by Django 5.2.6 on 2025-09-22 16:27
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='SearchKeyword',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('keyword', models.CharField(max_length=100, unique=True, verbose_name='关键字')),
|
||||
('is_active', models.BooleanField(default=True, verbose_name='是否启用')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
|
||||
('last_used', models.DateTimeField(blank=True, null=True, verbose_name='最后使用时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '搜索关键字',
|
||||
'verbose_name_plural': '搜索关键字',
|
||||
'ordering': ['-last_used', '-created_at'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Website',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=100, verbose_name='网站名称')),
|
||||
('url', models.URLField(verbose_name='网站地址')),
|
||||
('region', models.CharField(max_length=50, verbose_name='所属地区')),
|
||||
('is_active', models.BooleanField(default=True, verbose_name='是否启用')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, verbose_name='更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '目标网站',
|
||||
'verbose_name_plural': '目标网站',
|
||||
'ordering': ['region', 'name'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='CrawlTask',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=200, verbose_name='任务名称')),
|
||||
('keywords', models.TextField(help_text='多个关键字用逗号分隔', verbose_name='搜索关键字')),
|
||||
('status', models.CharField(choices=[('pending', '待执行'), ('running', '执行中'), ('completed', '已完成'), ('failed', '执行失败'), ('cancelled', '已取消')], default='pending', max_length=20, verbose_name='任务状态')),
|
||||
('created_by', models.CharField(default='system', max_length=100, verbose_name='创建者')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
|
||||
('started_at', models.DateTimeField(blank=True, null=True, verbose_name='开始时间')),
|
||||
('completed_at', models.DateTimeField(blank=True, null=True, verbose_name='完成时间')),
|
||||
('error_message', models.TextField(blank=True, verbose_name='错误信息')),
|
||||
('total_pages', models.IntegerField(default=0, verbose_name='总页数')),
|
||||
('crawled_pages', models.IntegerField(default=0, verbose_name='已爬取页数')),
|
||||
('websites', models.ManyToManyField(to='crawler.website', verbose_name='目标网站')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '爬取任务',
|
||||
'verbose_name_plural': '爬取任务',
|
||||
'ordering': ['-created_at'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='CrawlLog',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('level', models.CharField(choices=[('info', '信息'), ('warning', '警告'), ('error', '错误'), ('debug', '调试')], max_length=20, verbose_name='日志级别')),
|
||||
('message', models.TextField(verbose_name='日志消息')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='记录时间')),
|
||||
('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='logs', to='crawler.crawltask', verbose_name='所属任务')),
|
||||
('website', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='crawler.website', verbose_name='相关网站')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '爬取日志',
|
||||
'verbose_name_plural': '爬取日志',
|
||||
'ordering': ['-created_at'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='CrawledContent',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('title', models.CharField(max_length=500, verbose_name='标题')),
|
||||
('content', models.TextField(verbose_name='内容')),
|
||||
('url', models.URLField(verbose_name='原文链接')),
|
||||
('publish_date', models.DateTimeField(blank=True, null=True, verbose_name='发布时间')),
|
||||
('author', models.CharField(blank=True, max_length=100, verbose_name='作者')),
|
||||
('keywords_matched', models.TextField(help_text='匹配到的关键字,用逗号分隔', verbose_name='匹配的关键字')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='爬取时间')),
|
||||
('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='contents', to='crawler.crawltask', verbose_name='所属任务')),
|
||||
('website', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='crawler.website', verbose_name='来源网站')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '爬取内容',
|
||||
'verbose_name_plural': '爬取内容',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['task', 'website'], name='crawler_cra_task_id_6244e7_idx'), models.Index(fields=['created_at'], name='crawler_cra_created_a116d2_idx'), models.Index(fields=['publish_date'], name='crawler_cra_publish_5b8ccc_idx')],
|
||||
},
|
||||
),
|
||||
]
|
||||
Reference in New Issue
Block a user