Base setup
This commit is contained in:
106
crawler/migrations/0001_initial.py
Normal file
106
crawler/migrations/0001_initial.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# Generated by Django 5.2.6 on 2025-09-22 16:27
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='SearchKeyword',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('keyword', models.CharField(max_length=100, unique=True, verbose_name='关键字')),
|
||||
('is_active', models.BooleanField(default=True, verbose_name='是否启用')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
|
||||
('last_used', models.DateTimeField(blank=True, null=True, verbose_name='最后使用时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '搜索关键字',
|
||||
'verbose_name_plural': '搜索关键字',
|
||||
'ordering': ['-last_used', '-created_at'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Website',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=100, verbose_name='网站名称')),
|
||||
('url', models.URLField(verbose_name='网站地址')),
|
||||
('region', models.CharField(max_length=50, verbose_name='所属地区')),
|
||||
('is_active', models.BooleanField(default=True, verbose_name='是否启用')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
|
||||
('updated_at', models.DateTimeField(auto_now=True, verbose_name='更新时间')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '目标网站',
|
||||
'verbose_name_plural': '目标网站',
|
||||
'ordering': ['region', 'name'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='CrawlTask',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('name', models.CharField(max_length=200, verbose_name='任务名称')),
|
||||
('keywords', models.TextField(help_text='多个关键字用逗号分隔', verbose_name='搜索关键字')),
|
||||
('status', models.CharField(choices=[('pending', '待执行'), ('running', '执行中'), ('completed', '已完成'), ('failed', '执行失败'), ('cancelled', '已取消')], default='pending', max_length=20, verbose_name='任务状态')),
|
||||
('created_by', models.CharField(default='system', max_length=100, verbose_name='创建者')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
|
||||
('started_at', models.DateTimeField(blank=True, null=True, verbose_name='开始时间')),
|
||||
('completed_at', models.DateTimeField(blank=True, null=True, verbose_name='完成时间')),
|
||||
('error_message', models.TextField(blank=True, verbose_name='错误信息')),
|
||||
('total_pages', models.IntegerField(default=0, verbose_name='总页数')),
|
||||
('crawled_pages', models.IntegerField(default=0, verbose_name='已爬取页数')),
|
||||
('websites', models.ManyToManyField(to='crawler.website', verbose_name='目标网站')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '爬取任务',
|
||||
'verbose_name_plural': '爬取任务',
|
||||
'ordering': ['-created_at'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='CrawlLog',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('level', models.CharField(choices=[('info', '信息'), ('warning', '警告'), ('error', '错误'), ('debug', '调试')], max_length=20, verbose_name='日志级别')),
|
||||
('message', models.TextField(verbose_name='日志消息')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='记录时间')),
|
||||
('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='logs', to='crawler.crawltask', verbose_name='所属任务')),
|
||||
('website', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='crawler.website', verbose_name='相关网站')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '爬取日志',
|
||||
'verbose_name_plural': '爬取日志',
|
||||
'ordering': ['-created_at'],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='CrawledContent',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('title', models.CharField(max_length=500, verbose_name='标题')),
|
||||
('content', models.TextField(verbose_name='内容')),
|
||||
('url', models.URLField(verbose_name='原文链接')),
|
||||
('publish_date', models.DateTimeField(blank=True, null=True, verbose_name='发布时间')),
|
||||
('author', models.CharField(blank=True, max_length=100, verbose_name='作者')),
|
||||
('keywords_matched', models.TextField(help_text='匹配到的关键字,用逗号分隔', verbose_name='匹配的关键字')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='爬取时间')),
|
||||
('task', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='contents', to='crawler.crawltask', verbose_name='所属任务')),
|
||||
('website', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='crawler.website', verbose_name='来源网站')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '爬取内容',
|
||||
'verbose_name_plural': '爬取内容',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['task', 'website'], name='crawler_cra_task_id_6244e7_idx'), models.Index(fields=['created_at'], name='crawler_cra_created_a116d2_idx'), models.Index(fields=['publish_date'], name='crawler_cra_publish_5b8ccc_idx')],
|
||||
},
|
||||
),
|
||||
]
|
||||
@@ -0,0 +1,24 @@
|
||||
# Generated by Django 5.2.6 on 2025-09-23 00:38
|
||||
|
||||
import crawler.models
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('crawler', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='crawledcontent',
|
||||
name='is_local_saved',
|
||||
field=models.BooleanField(default=False, verbose_name='是否已本地保存'),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='crawledcontent',
|
||||
name='local_file',
|
||||
field=models.FileField(blank=True, null=True, upload_to=crawler.models.crawled_content_file_path, verbose_name='本地文件'),
|
||||
),
|
||||
]
|
||||
35
crawler/migrations/0003_mediafile.py
Normal file
35
crawler/migrations/0003_mediafile.py
Normal file
@@ -0,0 +1,35 @@
|
||||
# Generated by Django 5.2.6 on 2025-09-23 01:05
|
||||
|
||||
import crawler.models
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('crawler', '0002_crawledcontent_is_local_saved_and_more'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='MediaFile',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('media_type', models.CharField(choices=[('image', '图片'), ('video', '视频'), ('audio', '音频'), ('document', '文档')], max_length=20, verbose_name='媒体类型')),
|
||||
('original_url', models.URLField(verbose_name='原始URL')),
|
||||
('local_file', models.FileField(upload_to=crawler.models.media_file_path, verbose_name='本地文件')),
|
||||
('file_size', models.BigIntegerField(blank=True, null=True, verbose_name='文件大小(字节)')),
|
||||
('mime_type', models.CharField(blank=True, max_length=100, verbose_name='MIME类型')),
|
||||
('alt_text', models.CharField(blank=True, max_length=500, verbose_name='替代文本')),
|
||||
('created_at', models.DateTimeField(auto_now_add=True, verbose_name='创建时间')),
|
||||
('content', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='media_files', to='crawler.crawledcontent', verbose_name='所属内容')),
|
||||
],
|
||||
options={
|
||||
'verbose_name': '媒体文件',
|
||||
'verbose_name_plural': '媒体文件',
|
||||
'ordering': ['-created_at'],
|
||||
'indexes': [models.Index(fields=['content', 'media_type'], name='crawler_med_content_3a9468_idx'), models.Index(fields=['created_at'], name='crawler_med_created_13ff00_idx')],
|
||||
},
|
||||
),
|
||||
]
|
||||
0
crawler/migrations/__init__.py
Normal file
0
crawler/migrations/__init__.py
Normal file
Reference in New Issue
Block a user