Support keword
This commit is contained in:
@@ -16,9 +16,10 @@ from django.utils import timezone
|
||||
from django.db.models import Count, Q
|
||||
from django.core.cache import cache
|
||||
|
||||
from .models import Website, Article
|
||||
from .models import Website, Article, CrawlTask
|
||||
from .tasks import crawl_website, crawl_all_websites, cleanup_old_articles
|
||||
from .distributed_crawler import distributed_crawler
|
||||
from .task_executor import task_executor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -411,6 +412,419 @@ class ArticleAdmin(admin.ModelAdmin):
|
||||
actions_column.short_description = '操作'
|
||||
|
||||
|
||||
class CrawlTaskStatusFilter(SimpleListFilter):
|
||||
"""爬取任务状态过滤器"""
|
||||
title = '任务状态'
|
||||
parameter_name = 'status'
|
||||
|
||||
def lookups(self, request, model_admin):
|
||||
return (
|
||||
('pending', '等待中'),
|
||||
('running', '运行中'),
|
||||
('completed', '已完成'),
|
||||
('failed', '失败'),
|
||||
('cancelled', '已取消'),
|
||||
)
|
||||
|
||||
def queryset(self, request, queryset):
|
||||
if self.value():
|
||||
return queryset.filter(status=self.value())
|
||||
return queryset
|
||||
|
||||
|
||||
class CrawlTaskTypeFilter(SimpleListFilter):
|
||||
"""爬取任务类型过滤器"""
|
||||
title = '任务类型'
|
||||
parameter_name = 'task_type'
|
||||
|
||||
def lookups(self, request, model_admin):
|
||||
return (
|
||||
('keyword', '关键词搜索'),
|
||||
('historical', '历史文章'),
|
||||
('full_site', '全站爬取'),
|
||||
)
|
||||
|
||||
def queryset(self, request, queryset):
|
||||
if self.value():
|
||||
return queryset.filter(task_type=self.value())
|
||||
return queryset
|
||||
|
||||
|
||||
class CrawlTaskAdmin(admin.ModelAdmin):
|
||||
"""爬取任务管理"""
|
||||
list_display = [
|
||||
'name', 'task_type', 'keyword', 'websites_display', 'status',
|
||||
'progress_display', 'created_at', 'duration_display', 'actions_column'
|
||||
]
|
||||
list_filter = [CrawlTaskStatusFilter, CrawlTaskTypeFilter, 'created_at']
|
||||
search_fields = ['name', 'keyword', 'created_by']
|
||||
readonly_fields = [
|
||||
'status', 'progress', 'current_website', 'current_action',
|
||||
'total_articles', 'success_count', 'failed_count',
|
||||
'created_at', 'started_at', 'completed_at', 'error_message',
|
||||
'result_details', 'duration_display', 'progress_display'
|
||||
]
|
||||
actions = ['start_tasks', 'cancel_tasks', 'delete_completed_tasks']
|
||||
|
||||
class Media:
|
||||
js = ('admin/js/crawl_task_actions.js',)
|
||||
|
||||
fieldsets = (
|
||||
('基本信息', {
|
||||
'fields': ('name', 'task_type', 'keyword')
|
||||
}),
|
||||
('爬取配置', {
|
||||
'fields': ('websites', 'start_date', 'end_date', 'max_pages', 'max_articles')
|
||||
}),
|
||||
('任务状态', {
|
||||
'fields': ('status', 'progress_display', 'current_website', 'current_action'),
|
||||
'classes': ('collapse',)
|
||||
}),
|
||||
('统计信息', {
|
||||
'fields': ('total_articles', 'success_count', 'failed_count'),
|
||||
'classes': ('collapse',)
|
||||
}),
|
||||
('时间信息', {
|
||||
'fields': ('created_at', 'started_at', 'completed_at', 'duration_display'),
|
||||
'classes': ('collapse',)
|
||||
}),
|
||||
('错误信息', {
|
||||
'fields': ('error_message',),
|
||||
'classes': ('collapse',)
|
||||
}),
|
||||
('结果详情', {
|
||||
'fields': ('result_details',),
|
||||
'classes': ('collapse',)
|
||||
}),
|
||||
)
|
||||
|
||||
def websites_display(self, obj):
|
||||
"""网站列表显示"""
|
||||
return obj.get_websites_display()
|
||||
websites_display.short_description = '目标网站'
|
||||
|
||||
def progress_display(self, obj):
|
||||
"""进度显示"""
|
||||
if obj.status == 'running':
|
||||
return format_html(
|
||||
'<div style="width: 100px; background-color: #f0f0f0; border-radius: 3px;">'
|
||||
'<div style="width: {}%; background-color: #4CAF50; height: 20px; border-radius: 3px; text-align: center; color: white; line-height: 20px;">{}%</div>'
|
||||
'</div>',
|
||||
obj.progress, obj.progress
|
||||
)
|
||||
elif obj.status == 'completed':
|
||||
return format_html('<span style="color: green;">✓ 完成</span>')
|
||||
elif obj.status == 'failed':
|
||||
return format_html('<span style="color: red;">✗ 失败</span>')
|
||||
elif obj.status == 'cancelled':
|
||||
return format_html('<span style="color: orange;">⊘ 已取消</span>')
|
||||
else:
|
||||
return format_html('<span style="color: gray;">⏳ 等待</span>')
|
||||
progress_display.short_description = '进度'
|
||||
|
||||
def duration_display(self, obj):
|
||||
"""执行时长显示"""
|
||||
duration = obj.get_duration()
|
||||
if duration:
|
||||
total_seconds = int(duration.total_seconds())
|
||||
hours = total_seconds // 3600
|
||||
minutes = (total_seconds % 3600) // 60
|
||||
seconds = total_seconds % 60
|
||||
if hours > 0:
|
||||
return f"{hours}小时{minutes}分钟"
|
||||
elif minutes > 0:
|
||||
return f"{minutes}分钟{seconds}秒"
|
||||
else:
|
||||
return f"{seconds}秒"
|
||||
return "-"
|
||||
duration_display.short_description = '执行时长'
|
||||
|
||||
def actions_column(self, obj):
|
||||
"""操作列"""
|
||||
actions = []
|
||||
|
||||
if obj.status == 'pending':
|
||||
actions.append(f'<a href="javascript:void(0)" onclick="startTask({obj.id})" class="button">开始</a>')
|
||||
|
||||
if obj.can_cancel():
|
||||
actions.append(f'<a href="javascript:void(0)" onclick="cancelTask({obj.id})" class="button">取消</a>')
|
||||
|
||||
if obj.status == 'completed':
|
||||
actions.append(f'<a href="javascript:void(0)" onclick="viewResults({obj.id})" class="button">查看结果</a>')
|
||||
|
||||
return format_html(' '.join(actions))
|
||||
actions_column.short_description = '操作'
|
||||
|
||||
def start_tasks(self, request, queryset):
|
||||
"""启动选中的任务"""
|
||||
started_count = 0
|
||||
for task in queryset.filter(status='pending'):
|
||||
try:
|
||||
success, message = task_executor.start_task(task.id)
|
||||
if success:
|
||||
started_count += 1
|
||||
else:
|
||||
self.message_user(request, f'启动任务 {task.name} 失败: {message}', messages.ERROR)
|
||||
except Exception as e:
|
||||
self.message_user(request, f'启动任务 {task.name} 失败: {e}', messages.ERROR)
|
||||
|
||||
if started_count > 0:
|
||||
self.message_user(request, f'成功启动 {started_count} 个任务', messages.SUCCESS)
|
||||
start_tasks.short_description = '启动选中的任务'
|
||||
|
||||
def cancel_tasks(self, request, queryset):
|
||||
"""取消选中的任务"""
|
||||
cancelled_count = 0
|
||||
for task in queryset.filter(status__in=['pending', 'running']):
|
||||
try:
|
||||
success, message = task_executor.cancel_task(task.id)
|
||||
if success:
|
||||
cancelled_count += 1
|
||||
else:
|
||||
self.message_user(request, f'取消任务 {task.name} 失败: {message}', messages.ERROR)
|
||||
except Exception as e:
|
||||
self.message_user(request, f'取消任务 {task.name} 失败: {e}', messages.ERROR)
|
||||
|
||||
if cancelled_count > 0:
|
||||
self.message_user(request, f'成功取消 {cancelled_count} 个任务', messages.SUCCESS)
|
||||
cancel_tasks.short_description = '取消选中的任务'
|
||||
|
||||
def delete_completed_tasks(self, request, queryset):
|
||||
"""删除已完成的任务"""
|
||||
completed_tasks = queryset.filter(status__in=['completed', 'failed', 'cancelled'])
|
||||
count = completed_tasks.count()
|
||||
completed_tasks.delete()
|
||||
|
||||
if count > 0:
|
||||
self.message_user(request, f'成功删除 {count} 个已完成的任务', messages.SUCCESS)
|
||||
delete_completed_tasks.short_description = '删除已完成的任务'
|
||||
|
||||
def get_urls(self):
|
||||
"""添加自定义URL"""
|
||||
urls = super().get_urls()
|
||||
custom_urls = [
|
||||
path(
|
||||
'create-keyword-task/',
|
||||
self.admin_site.admin_view(self.create_keyword_task_view),
|
||||
name='create_keyword_task',
|
||||
),
|
||||
path(
|
||||
'create-historical-task/',
|
||||
self.admin_site.admin_view(self.create_historical_task_view),
|
||||
name='create_historical_task',
|
||||
),
|
||||
path(
|
||||
'create-full-site-task/',
|
||||
self.admin_site.admin_view(self.create_full_site_task_view),
|
||||
name='create_full_site_task',
|
||||
),
|
||||
path(
|
||||
'<int:task_id>/start/',
|
||||
self.admin_site.admin_view(self.start_task_view),
|
||||
name='start_task',
|
||||
),
|
||||
path(
|
||||
'<int:task_id>/cancel/',
|
||||
self.admin_site.admin_view(self.cancel_task_view),
|
||||
name='cancel_task',
|
||||
),
|
||||
path(
|
||||
'<int:task_id>/results/',
|
||||
self.admin_site.admin_view(self.view_results_view),
|
||||
name='view_results',
|
||||
),
|
||||
]
|
||||
return custom_urls + urls
|
||||
|
||||
def create_keyword_task_view(self, request):
|
||||
"""创建关键词搜索任务视图"""
|
||||
if request.method == 'POST':
|
||||
try:
|
||||
from .utils import WEBSITE_SEARCH_CONFIGS
|
||||
|
||||
name = request.POST.get('name', '')
|
||||
keyword = request.POST.get('keyword', '')
|
||||
websites = request.POST.getlist('websites')
|
||||
start_date = request.POST.get('start_date')
|
||||
end_date = request.POST.get('end_date')
|
||||
max_pages = int(request.POST.get('max_pages', 10))
|
||||
max_articles = int(request.POST.get('max_articles', 100))
|
||||
|
||||
if not name or not keyword:
|
||||
self.message_user(request, '任务名称和关键词不能为空', messages.ERROR)
|
||||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||||
|
||||
# 创建任务
|
||||
task = CrawlTask.objects.create(
|
||||
name=name,
|
||||
task_type='keyword',
|
||||
keyword=keyword,
|
||||
start_date=start_date if start_date else None,
|
||||
end_date=end_date if end_date else None,
|
||||
max_pages=max_pages,
|
||||
max_articles=max_articles,
|
||||
created_by=request.user.username if request.user.is_authenticated else 'admin'
|
||||
)
|
||||
|
||||
# 添加选择的网站
|
||||
if websites:
|
||||
website_objects = Website.objects.filter(name__in=websites)
|
||||
task.websites.set(website_objects)
|
||||
|
||||
self.message_user(request, f'关键词搜索任务 "{name}" 创建成功', messages.SUCCESS)
|
||||
return HttpResponseRedirect(reverse('admin:core_crawltask_change', args=[task.id]))
|
||||
|
||||
except Exception as e:
|
||||
self.message_user(request, f'创建任务失败: {e}', messages.ERROR)
|
||||
|
||||
# GET请求,显示创建表单
|
||||
context = {
|
||||
'websites': Website.objects.filter(enabled=True),
|
||||
'title': '创建关键词搜索任务'
|
||||
}
|
||||
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/create_keyword_task.html', context)
|
||||
|
||||
def create_historical_task_view(self, request):
|
||||
"""创建历史文章任务视图"""
|
||||
if request.method == 'POST':
|
||||
try:
|
||||
from .utils import WEBSITE_SEARCH_CONFIGS
|
||||
|
||||
name = request.POST.get('name', '')
|
||||
websites = request.POST.getlist('websites')
|
||||
start_date = request.POST.get('start_date')
|
||||
end_date = request.POST.get('end_date')
|
||||
max_articles = int(request.POST.get('max_articles', 50))
|
||||
|
||||
if not name:
|
||||
self.message_user(request, '任务名称不能为空', messages.ERROR)
|
||||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||||
|
||||
# 创建任务
|
||||
task = CrawlTask.objects.create(
|
||||
name=name,
|
||||
task_type='historical',
|
||||
keyword='历史文章',
|
||||
start_date=start_date if start_date else None,
|
||||
end_date=end_date if end_date else None,
|
||||
max_articles=max_articles,
|
||||
created_by=request.user.username if request.user.is_authenticated else 'admin'
|
||||
)
|
||||
|
||||
# 添加选择的网站
|
||||
if websites:
|
||||
website_objects = Website.objects.filter(name__in=websites)
|
||||
task.websites.set(website_objects)
|
||||
|
||||
self.message_user(request, f'历史文章任务 "{name}" 创建成功', messages.SUCCESS)
|
||||
return HttpResponseRedirect(reverse('admin:core_crawltask_change', args=[task.id]))
|
||||
|
||||
except Exception as e:
|
||||
self.message_user(request, f'创建任务失败: {e}', messages.ERROR)
|
||||
|
||||
# GET请求,显示创建表单
|
||||
context = {
|
||||
'websites': Website.objects.filter(enabled=True),
|
||||
'title': '创建历史文章任务'
|
||||
}
|
||||
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/create_historical_task.html', context)
|
||||
|
||||
def create_full_site_task_view(self, request):
|
||||
"""创建全站爬取任务视图"""
|
||||
if request.method == 'POST':
|
||||
try:
|
||||
from .utils import WEBSITE_SEARCH_CONFIGS
|
||||
|
||||
name = request.POST.get('name', '')
|
||||
websites = request.POST.getlist('websites')
|
||||
max_pages = int(request.POST.get('max_pages', 500))
|
||||
|
||||
if not name:
|
||||
self.message_user(request, '任务名称不能为空', messages.ERROR)
|
||||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||||
|
||||
# 创建任务
|
||||
task = CrawlTask.objects.create(
|
||||
name=name,
|
||||
task_type='full_site',
|
||||
keyword='全站爬取',
|
||||
max_pages=max_pages,
|
||||
created_by=request.user.username if request.user.is_authenticated else 'admin'
|
||||
)
|
||||
|
||||
# 添加选择的网站
|
||||
if websites:
|
||||
website_objects = Website.objects.filter(name__in=websites)
|
||||
task.websites.set(website_objects)
|
||||
|
||||
self.message_user(request, f'全站爬取任务 "{name}" 创建成功', messages.SUCCESS)
|
||||
return HttpResponseRedirect(reverse('admin:core_crawltask_change', args=[task.id]))
|
||||
|
||||
except Exception as e:
|
||||
self.message_user(request, f'创建任务失败: {e}', messages.ERROR)
|
||||
|
||||
# GET请求,显示创建表单
|
||||
context = {
|
||||
'websites': Website.objects.filter(enabled=True),
|
||||
'title': '创建全站爬取任务'
|
||||
}
|
||||
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/create_full_site_task.html', context)
|
||||
|
||||
def start_task_view(self, request, task_id):
|
||||
"""启动任务视图"""
|
||||
try:
|
||||
success, message = task_executor.start_task(task_id)
|
||||
if success:
|
||||
self.message_user(request, f'任务已启动: {message}', messages.SUCCESS)
|
||||
else:
|
||||
self.message_user(request, f'启动任务失败: {message}', messages.ERROR)
|
||||
except Exception as e:
|
||||
self.message_user(request, f'启动任务失败: {e}', messages.ERROR)
|
||||
|
||||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||||
|
||||
def cancel_task_view(self, request, task_id):
|
||||
"""取消任务视图"""
|
||||
try:
|
||||
success, message = task_executor.cancel_task(task_id)
|
||||
if success:
|
||||
self.message_user(request, f'任务已取消: {message}', messages.SUCCESS)
|
||||
else:
|
||||
self.message_user(request, f'取消任务失败: {message}', messages.ERROR)
|
||||
except Exception as e:
|
||||
self.message_user(request, f'取消任务失败: {e}', messages.ERROR)
|
||||
|
||||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||||
|
||||
def view_results_view(self, request, task_id):
|
||||
"""查看结果视图"""
|
||||
try:
|
||||
task = CrawlTask.objects.get(id=task_id)
|
||||
context = {
|
||||
'task': task,
|
||||
'title': f'任务结果 - {task.name}'
|
||||
}
|
||||
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/task_results.html', context)
|
||||
except CrawlTask.DoesNotExist:
|
||||
self.message_user(request, '任务不存在', messages.ERROR)
|
||||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||||
|
||||
def render_create_task_template(self, request, template_name, context):
|
||||
"""渲染创建任务模板"""
|
||||
from django.template.loader import render_to_string
|
||||
from django.http import HttpResponse
|
||||
|
||||
context.update({
|
||||
'site_header': admin.site.site_header,
|
||||
'site_title': admin.site.site_title,
|
||||
'has_permission': True,
|
||||
'user': request.user,
|
||||
})
|
||||
|
||||
html = render_to_string(template_name, context)
|
||||
return HttpResponse(html)
|
||||
|
||||
|
||||
#class CrawlerStatusAdmin(admin.ModelAdmin):
|
||||
# """爬虫状态管理"""
|
||||
# change_list_template = 'admin/crawler_status.html'
|
||||
@@ -448,6 +862,7 @@ class ArticleAdmin(admin.ModelAdmin):
|
||||
# 注册管理类
|
||||
admin.site.register(Website, WebsiteAdmin)
|
||||
admin.site.register(Article, ArticleAdmin)
|
||||
admin.site.register(CrawlTask, CrawlTaskAdmin)
|
||||
|
||||
|
||||
# 隐藏Celery Results管理功能
|
||||
|
||||
Reference in New Issue
Block a user