896 lines
35 KiB
Python
896 lines
35 KiB
Python
"""
|
||
Django Admin扩展
|
||
提供增强的管理界面功能
|
||
"""
|
||
|
||
import logging
|
||
from datetime import datetime, timedelta
|
||
from django.contrib import admin
|
||
from django.contrib.admin import SimpleListFilter
|
||
from django.contrib.admin.utils import model_format_dict
|
||
from django.contrib import messages
|
||
from django.http import HttpResponseRedirect
|
||
from django.urls import path, reverse
|
||
from django.utils.html import format_html
|
||
from django.utils import timezone
|
||
from django.db.models import Count, Q
|
||
from django.core.cache import cache
|
||
|
||
from .models import Website, Article, CrawlTask
|
||
from .tasks import crawl_website, crawl_all_websites, cleanup_old_articles
|
||
from .distributed_crawler import distributed_crawler
|
||
from .task_executor import task_executor
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class WebsiteStatusFilter(SimpleListFilter):
|
||
"""网站状态过滤器"""
|
||
title = '网站状态'
|
||
parameter_name = 'status'
|
||
|
||
def lookups(self, request, model_admin):
|
||
return (
|
||
('enabled', '已启用'),
|
||
('disabled', '已禁用'),
|
||
('no_articles', '无文章'),
|
||
('recent_crawl', '最近爬取'),
|
||
)
|
||
|
||
def queryset(self, request, queryset):
|
||
if self.value() == 'enabled':
|
||
return queryset.filter(enabled=True)
|
||
elif self.value() == 'disabled':
|
||
return queryset.filter(enabled=False)
|
||
elif self.value() == 'no_articles':
|
||
return queryset.annotate(article_count=Count('article')).filter(article_count=0)
|
||
elif self.value() == 'recent_crawl':
|
||
week_ago = timezone.now() - timedelta(days=7)
|
||
return queryset.filter(last_crawl__gte=week_ago)
|
||
return queryset
|
||
|
||
|
||
class ArticleDateFilter(SimpleListFilter):
|
||
"""文章日期过滤器"""
|
||
title = '发布时间'
|
||
parameter_name = 'date_range'
|
||
|
||
def lookups(self, request, model_admin):
|
||
return (
|
||
('today', '今天'),
|
||
('week', '本周'),
|
||
('month', '本月'),
|
||
('quarter', '本季度'),
|
||
)
|
||
|
||
def queryset(self, request, queryset):
|
||
now = timezone.now()
|
||
if self.value() == 'today':
|
||
return queryset.filter(created_at__date=now.date())
|
||
elif self.value() == 'week':
|
||
week_start = now - timedelta(days=now.weekday())
|
||
return queryset.filter(created_at__gte=week_start.replace(hour=0, minute=0, second=0))
|
||
elif self.value() == 'month':
|
||
return queryset.filter(created_at__year=now.year, created_at__month=now.month)
|
||
elif self.value() == 'quarter':
|
||
quarter = (now.month - 1) // 3
|
||
quarter_start_month = quarter * 3 + 1
|
||
return queryset.filter(
|
||
created_at__year=now.year,
|
||
created_at__month__gte=quarter_start_month,
|
||
created_at__month__lt=quarter_start_month + 3
|
||
)
|
||
return queryset
|
||
|
||
|
||
class WebsiteAdmin(admin.ModelAdmin):
|
||
"""网站管理"""
|
||
list_display = [
|
||
'name', 'base_url', 'enabled', 'article_count',
|
||
'last_crawl_display', 'status_indicator', 'actions_column'
|
||
]
|
||
list_filter = [WebsiteStatusFilter, 'enabled']
|
||
search_fields = ['name', 'base_url']
|
||
readonly_fields = ['article_count']
|
||
actions = ['enable_websites', 'disable_websites', 'crawl_selected', 'crawl_all']
|
||
|
||
fieldsets = (
|
||
('基本信息', {
|
||
'fields': ('name', 'base_url', 'enabled')
|
||
}),
|
||
('统计信息', {
|
||
'fields': ('article_count',),
|
||
'classes': ('collapse',)
|
||
}),
|
||
('时间信息', {
|
||
'fields': (),
|
||
'classes': ('collapse',)
|
||
}),
|
||
)
|
||
|
||
# 添加get_websites方法以支持模板中的网站选择
|
||
def get_websites(self, request):
|
||
"""获取所有启用的网站,用于模板中的选择框"""
|
||
return Website.objects.filter(enabled=True)
|
||
|
||
def article_count(self, obj):
|
||
"""文章数量"""
|
||
return obj.article_set.count()
|
||
|
||
article_count.short_description = '文章数量'
|
||
|
||
def last_crawl_display(self, obj):
|
||
"""最后爬取时间显示"""
|
||
return '未实现'
|
||
|
||
last_crawl_display.short_description = '最后爬取'
|
||
|
||
def status_indicator(self, obj):
|
||
"""状态指示器"""
|
||
if obj.enabled:
|
||
return format_html('<span style="color: green;">●</span> 正常')
|
||
else:
|
||
return format_html('<span style="color: red;">●</span> 禁用')
|
||
|
||
status_indicator.short_description = '状态'
|
||
|
||
def actions_column(self, obj):
|
||
"""操作列"""
|
||
return format_html(
|
||
'<a href="{}" class="button">爬取</a> '
|
||
'<a href="{}" class="button">查看文章</a>',
|
||
reverse('admin:crawl_website', args=[obj.id]),
|
||
reverse('admin:core_article_changelist') + f'?website__id__exact={obj.id}'
|
||
)
|
||
|
||
actions_column.short_description = '操作'
|
||
|
||
def enable_websites(self, request, queryset):
|
||
"""启用选中的网站"""
|
||
updated = queryset.update(enabled=True)
|
||
self.message_user(request, f'成功启用 {updated} 个网站')
|
||
|
||
enable_websites.short_description = '启用选中的网站'
|
||
|
||
def disable_websites(self, request, queryset):
|
||
"""禁用选中的网站"""
|
||
updated = queryset.update(enabled=False)
|
||
self.message_user(request, f'成功禁用 {updated} 个网站')
|
||
|
||
disable_websites.short_description = '禁用选中的网站'
|
||
|
||
def crawl_selected(self, request, queryset):
|
||
"""爬取选中的网站"""
|
||
for website in queryset:
|
||
try:
|
||
task = crawl_website.delay(website.id)
|
||
self.message_user(
|
||
request,
|
||
f'网站 {website.name} 爬取任务已启动 (任务ID: {task.id})',
|
||
messages.SUCCESS
|
||
)
|
||
except Exception as e:
|
||
error_msg = str(e)
|
||
if "[Errno 61] Connection refused" in error_msg:
|
||
detailed_msg = "连接被拒绝,可能是Redis或其他依赖服务未启动。请检查以下几点:\n1. Redis服务是否运行 (尝试运行: redis-server)\n2. 如果使用Docker,请确保容器正在运行\n3. 检查Django配置中的CELERY_BROKER_URL设置\n4. 在本地开发环境中,可以运行 'python manage.py runserver' 和 'celery -A myproject worker -l info' 来启动必要的服务"
|
||
else:
|
||
detailed_msg = error_msg
|
||
self.message_user(
|
||
request,
|
||
f'网站 {website.name} 爬取任务启动失败: {detailed_msg}',
|
||
messages.ERROR
|
||
)
|
||
|
||
crawl_selected.short_description = '爬取选中的网站'
|
||
|
||
def crawl_all(self, request, queryset):
|
||
try:
|
||
task = crawl_all_websites.delay()
|
||
self.message_user(
|
||
request,
|
||
f'批量爬取任务已启动 (任务ID: {task.id})',
|
||
messages.SUCCESS
|
||
)
|
||
except Exception as e:
|
||
error_msg = str(e)
|
||
if "[Errno 61] Connection refused" in error_msg:
|
||
detailed_msg = "连接被拒绝,可能是Redis或其他依赖服务未启动。请检查以下几点:\n1. Redis服务是否运行 (尝试运行: redis-server)\n2. 如果使用Docker,请确保容器正在运行\n3. 检查Django配置中的CELERY_BROKER_URL设置\n4. 在本地开发环境中,可以运行 'python manage.py runserver' 和 'celery -A myproject worker -l info' 来启动必要的服务"
|
||
else:
|
||
detailed_msg = error_msg
|
||
self.message_user(
|
||
request,
|
||
f'批量爬取任务启动失败: {detailed_msg}',
|
||
messages.ERROR
|
||
)
|
||
|
||
# crawl_all.short_description = '爬取所有网站'
|
||
|
||
def get_urls(self):
|
||
"""添加自定义URL"""
|
||
urls = super().get_urls()
|
||
custom_urls = [
|
||
path(
|
||
'<int:website_id>/crawl/',
|
||
self.admin_site.admin_view(self.crawl_website_view),
|
||
name='crawl_website',
|
||
),
|
||
path(
|
||
'run-crawler/',
|
||
self.admin_site.admin_view(self.run_crawler_view),
|
||
name='run_crawler',
|
||
),
|
||
]
|
||
return custom_urls + urls
|
||
|
||
def crawl_website_view(self, request, website_id):
|
||
"""爬取单个网站视图"""
|
||
try:
|
||
website = Website.objects.get(id=website_id)
|
||
task = crawl_website.delay(website_id)
|
||
self.message_user(
|
||
request,
|
||
f'网站 {website.name} 爬取任务已启动 (任务ID: {task.id})',
|
||
messages.SUCCESS
|
||
)
|
||
except Website.DoesNotExist:
|
||
self.message_user(request, '网站不存在', messages.ERROR)
|
||
except Exception as e:
|
||
error_msg = str(e)
|
||
if "[Errno 61] Connection refused" in error_msg:
|
||
detailed_msg = "连接被拒绝,可能是Redis或其他依赖服务未启动。请检查以下几点:\n1. Redis服务是否运行 (尝试运行: redis-server)\n2. 如果使用Docker,请确保容器正在运行\n3. 检查Django配置中的CELERY_BROKER_URL设置\n4. 在本地开发环境中,可以运行 'python manage.py runserver' 和 'celery -A myproject worker -l info' 来启动必要的服务"
|
||
else:
|
||
detailed_msg = error_msg
|
||
self.message_user(request, f'爬取任务启动失败: {detailed_msg}', messages.ERROR)
|
||
|
||
return HttpResponseRedirect(reverse('admin:core_website_changelist'))
|
||
|
||
def run_crawler_view(self, request):
|
||
"""运行爬虫视图"""
|
||
try:
|
||
task = crawl_all_websites.delay()
|
||
self.message_user(
|
||
request,
|
||
f'批量爬取任务已启动 (任务ID: {task.id})',
|
||
messages.SUCCESS
|
||
)
|
||
except Exception as e:
|
||
error_msg = str(e)
|
||
if "[Errno 61] Connection refused" in error_msg:
|
||
detailed_msg = "连接被拒绝,可能是Redis或其他依赖服务未启动。请检查以下几点:\n1. Redis服务是否运行 (尝试运行: redis-server)\n2. 如果使用Docker,请确保容器正在运行\n3. 检查Django配置中的CELERY_BROKER_URL设置\n4. 在本地开发环境中,可以运行 'python manage.py runserver' 和 'celery -A myproject worker -l info' 来启动必要的服务"
|
||
else:
|
||
detailed_msg = error_msg
|
||
self.message_user(
|
||
request,
|
||
f'批量爬取任务启动失败: {detailed_msg}',
|
||
messages.ERROR
|
||
)
|
||
|
||
return HttpResponseRedirect(reverse('admin:core_website_changelist'))
|
||
|
||
|
||
class ArticleAdmin(admin.ModelAdmin):
|
||
"""文章管理"""
|
||
list_display = [
|
||
'title', 'website', 'created_at',
|
||
'media_count', 'actions_column'
|
||
]
|
||
list_filter = [
|
||
ArticleDateFilter, 'website', 'created_at'
|
||
]
|
||
search_fields = ['title', 'content', 'url']
|
||
readonly_fields = ['created_at', 'media_files_display']
|
||
date_hierarchy = 'created_at'
|
||
|
||
fieldsets = (
|
||
('基本信息', {
|
||
'fields': ('title', 'url', 'website')
|
||
}),
|
||
('内容', {
|
||
'fields': ('content',)
|
||
}),
|
||
('媒体文件', {
|
||
'fields': ('media_files_display',),
|
||
'classes': ('collapse',)
|
||
}),
|
||
('时间信息', {
|
||
'fields': ('created_at',),
|
||
'classes': ('collapse',)
|
||
}),
|
||
)
|
||
|
||
# 添加导出选中文章的操作
|
||
actions = ['export_selected_articles']
|
||
|
||
def export_selected_articles(self, request, queryset):
|
||
"""
|
||
导出选中的文章为ZIP文件
|
||
"""
|
||
import zipfile
|
||
from django.http import HttpResponse
|
||
from io import BytesIO
|
||
from django.conf import settings
|
||
import os
|
||
from bs4 import BeautifulSoup
|
||
from docx import Document
|
||
|
||
# 创建内存中的ZIP文件
|
||
zip_buffer = BytesIO()
|
||
|
||
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
|
||
# 为每篇文章创建文件夹并添加内容
|
||
for article in queryset:
|
||
# 创建文章文件夹名称
|
||
article_folder = f"article_{article.id}_{article.title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')}"
|
||
|
||
# 创建Word文档
|
||
doc = Document()
|
||
doc.add_heading(article.title, 0)
|
||
|
||
# 添加文章信息
|
||
doc.add_paragraph(f"网站: {article.website.name if article.website else ''}")
|
||
doc.add_paragraph(f"URL: {article.url}")
|
||
doc.add_paragraph(f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else ''}")
|
||
doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S') if article.created_at else ''}")
|
||
|
||
# 添加内容标题
|
||
doc.add_heading('内容:', level=1)
|
||
|
||
# 处理HTML内容
|
||
soup = BeautifulSoup(article.content, 'html.parser')
|
||
content_text = soup.get_text()
|
||
doc.add_paragraph(content_text)
|
||
|
||
# 将Word文档保存到内存中
|
||
doc_buffer = BytesIO()
|
||
doc.save(doc_buffer)
|
||
doc_buffer.seek(0)
|
||
|
||
# 将Word文档添加到ZIP文件
|
||
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'), doc_buffer.getvalue())
|
||
|
||
# 添加媒体文件到ZIP包
|
||
if article.media_files:
|
||
for media_file in article.media_files:
|
||
try:
|
||
full_path = os.path.join(settings.MEDIA_ROOT, media_file)
|
||
if os.path.exists(full_path):
|
||
# 添加文件到ZIP包
|
||
zip_file.write(full_path, os.path.join(article_folder, 'media', os.path.basename(media_file)))
|
||
except Exception as e:
|
||
# 如果添加媒体文件失败,继续处理其他文件
|
||
pass
|
||
|
||
# 创建HttpResponse
|
||
zip_buffer.seek(0)
|
||
response = HttpResponse(zip_buffer.getvalue(), content_type='application/zip')
|
||
response['Content-Disposition'] = 'attachment; filename=selected_articles.zip'
|
||
|
||
return response
|
||
|
||
export_selected_articles.short_description = "导出所选的文章为ZIP"
|
||
|
||
def content_preview(self, obj):
|
||
"""内容预览"""
|
||
return obj.content[:100] + '...' if len(obj.content) > 100 else obj.content
|
||
|
||
content_preview.short_description = '内容预览'
|
||
|
||
def media_count(self, obj):
|
||
"""媒体文件数量"""
|
||
if obj.media_files:
|
||
return len(obj.media_files)
|
||
return 0
|
||
|
||
media_count.short_description = '媒体文件'
|
||
|
||
def media_files_display(self, obj):
|
||
"""媒体文件显示"""
|
||
if not obj.media_files:
|
||
return '无媒体文件'
|
||
|
||
html = '<div style="max-height: 300px; overflow-y: auto;">'
|
||
for i, media in enumerate(obj.media_files):
|
||
if media.get('type') == 'image':
|
||
html += f'<div style="margin: 10px 0;"><img src="{media["url"]}" style="max-width: 200px; max-height: 150px;" /></div>'
|
||
elif media.get('type') == 'video':
|
||
html += f'<div style="margin: 10px 0;"><video controls style="max-width: 200px;"><source src="{media["url"]}" type="video/mp4"></video></div>'
|
||
html += '</div>'
|
||
return format_html(html)
|
||
|
||
media_files_display.short_description = '媒体文件'
|
||
|
||
def actions_column(self, obj):
|
||
"""操作列"""
|
||
# 修改: 添加跳转到本地文章详情页的链接
|
||
return format_html(
|
||
'<a href="{}" target="_blank" class="button">查看原文</a> '
|
||
'<a href="{}" target="_blank" class="button">本地查看</a>',
|
||
obj.url,
|
||
reverse('article_detail', args=[obj.id])
|
||
)
|
||
|
||
actions_column.short_description = '操作'
|
||
|
||
|
||
class CrawlTaskStatusFilter(SimpleListFilter):
|
||
"""爬取任务状态过滤器"""
|
||
title = '任务状态'
|
||
parameter_name = 'status'
|
||
|
||
def lookups(self, request, model_admin):
|
||
return (
|
||
('pending', '等待中'),
|
||
('running', '运行中'),
|
||
('completed', '已完成'),
|
||
('failed', '失败'),
|
||
('cancelled', '已取消'),
|
||
)
|
||
|
||
def queryset(self, request, queryset):
|
||
if self.value():
|
||
return queryset.filter(status=self.value())
|
||
return queryset
|
||
|
||
|
||
class CrawlTaskTypeFilter(SimpleListFilter):
|
||
"""爬取任务类型过滤器"""
|
||
title = '任务类型'
|
||
parameter_name = 'task_type'
|
||
|
||
def lookups(self, request, model_admin):
|
||
return (
|
||
('keyword', '关键词搜索'),
|
||
('historical', '历史文章'),
|
||
('full_site', '全站爬取'),
|
||
)
|
||
|
||
def queryset(self, request, queryset):
|
||
if self.value():
|
||
return queryset.filter(task_type=self.value())
|
||
return queryset
|
||
|
||
|
||
class CrawlTaskAdmin(admin.ModelAdmin):
|
||
"""爬取任务管理"""
|
||
list_display = [
|
||
'name', 'task_type', 'keyword', 'websites_display', 'status',
|
||
'progress_display', 'created_at', 'duration_display', 'actions_column'
|
||
]
|
||
list_filter = [CrawlTaskStatusFilter, CrawlTaskTypeFilter, 'created_at']
|
||
search_fields = ['name', 'keyword', 'created_by']
|
||
readonly_fields = [
|
||
'status', 'progress', 'current_website', 'current_action',
|
||
'total_articles', 'success_count', 'failed_count',
|
||
'created_at', 'started_at', 'completed_at', 'error_message',
|
||
'result_details', 'duration_display', 'progress_display'
|
||
]
|
||
actions = ['start_tasks', 'cancel_tasks', 'delete_completed_tasks']
|
||
|
||
class Media:
|
||
js = ('admin/js/crawl_task_actions.js',)
|
||
|
||
fieldsets = (
|
||
('基本信息', {
|
||
'fields': ('name', 'task_type', 'keyword')
|
||
}),
|
||
('爬取配置', {
|
||
'fields': ('websites', 'start_date', 'end_date', 'max_pages', 'max_articles')
|
||
}),
|
||
('任务状态', {
|
||
'fields': ('status', 'progress_display', 'current_website', 'current_action'),
|
||
'classes': ('collapse',)
|
||
}),
|
||
('统计信息', {
|
||
'fields': ('total_articles', 'success_count', 'failed_count'),
|
||
'classes': ('collapse',)
|
||
}),
|
||
('时间信息', {
|
||
'fields': ('created_at', 'started_at', 'completed_at', 'duration_display'),
|
||
'classes': ('collapse',)
|
||
}),
|
||
('错误信息', {
|
||
'fields': ('error_message',),
|
||
'classes': ('collapse',)
|
||
}),
|
||
('结果详情', {
|
||
'fields': ('result_details',),
|
||
'classes': ('collapse',)
|
||
}),
|
||
)
|
||
|
||
def websites_display(self, obj):
|
||
"""网站列表显示"""
|
||
return obj.get_websites_display()
|
||
websites_display.short_description = '目标网站'
|
||
|
||
def progress_display(self, obj):
|
||
"""进度显示"""
|
||
if obj.status == 'running':
|
||
return format_html(
|
||
'<div style="width: 100px; background-color: #f0f0f0; border-radius: 3px;">'
|
||
'<div style="width: {}%; background-color: #4CAF50; height: 20px; border-radius: 3px; text-align: center; color: white; line-height: 20px;">{}%</div>'
|
||
'</div>',
|
||
obj.progress, obj.progress
|
||
)
|
||
elif obj.status == 'completed':
|
||
return format_html('<span style="color: green;">✓ 完成</span>')
|
||
elif obj.status == 'failed':
|
||
return format_html('<span style="color: red;">✗ 失败</span>')
|
||
elif obj.status == 'cancelled':
|
||
return format_html('<span style="color: orange;">⊘ 已取消</span>')
|
||
else:
|
||
return format_html('<span style="color: gray;">⏳ 等待</span>')
|
||
progress_display.short_description = '进度'
|
||
|
||
def duration_display(self, obj):
|
||
"""执行时长显示"""
|
||
duration = obj.get_duration()
|
||
if duration:
|
||
total_seconds = int(duration.total_seconds())
|
||
hours = total_seconds // 3600
|
||
minutes = (total_seconds % 3600) // 60
|
||
seconds = total_seconds % 60
|
||
if hours > 0:
|
||
return f"{hours}小时{minutes}分钟"
|
||
elif minutes > 0:
|
||
return f"{minutes}分钟{seconds}秒"
|
||
else:
|
||
return f"{seconds}秒"
|
||
return "-"
|
||
duration_display.short_description = '执行时长'
|
||
|
||
def actions_column(self, obj):
|
||
"""操作列"""
|
||
actions = []
|
||
|
||
if obj.status == 'pending':
|
||
actions.append(f'<a href="javascript:void(0)" onclick="startTask({obj.id})" class="button">开始</a>')
|
||
|
||
if obj.can_cancel():
|
||
actions.append(f'<a href="javascript:void(0)" onclick="cancelTask({obj.id})" class="button">取消</a>')
|
||
|
||
if obj.status == 'completed':
|
||
actions.append(f'<a href="javascript:void(0)" onclick="viewResults({obj.id})" class="button">查看结果</a>')
|
||
|
||
return format_html(' '.join(actions))
|
||
actions_column.short_description = '操作'
|
||
|
||
def start_tasks(self, request, queryset):
|
||
"""启动选中的任务"""
|
||
started_count = 0
|
||
for task in queryset.filter(status='pending'):
|
||
try:
|
||
success, message = task_executor.start_task(task.id)
|
||
if success:
|
||
started_count += 1
|
||
else:
|
||
self.message_user(request, f'启动任务 {task.name} 失败: {message}', messages.ERROR)
|
||
except Exception as e:
|
||
self.message_user(request, f'启动任务 {task.name} 失败: {e}', messages.ERROR)
|
||
|
||
if started_count > 0:
|
||
self.message_user(request, f'成功启动 {started_count} 个任务', messages.SUCCESS)
|
||
start_tasks.short_description = '启动选中的任务'
|
||
|
||
def cancel_tasks(self, request, queryset):
|
||
"""取消选中的任务"""
|
||
cancelled_count = 0
|
||
for task in queryset.filter(status__in=['pending', 'running']):
|
||
try:
|
||
success, message = task_executor.cancel_task(task.id)
|
||
if success:
|
||
cancelled_count += 1
|
||
else:
|
||
self.message_user(request, f'取消任务 {task.name} 失败: {message}', messages.ERROR)
|
||
except Exception as e:
|
||
self.message_user(request, f'取消任务 {task.name} 失败: {e}', messages.ERROR)
|
||
|
||
if cancelled_count > 0:
|
||
self.message_user(request, f'成功取消 {cancelled_count} 个任务', messages.SUCCESS)
|
||
cancel_tasks.short_description = '取消选中的任务'
|
||
|
||
def delete_completed_tasks(self, request, queryset):
|
||
"""删除已完成的任务"""
|
||
completed_tasks = queryset.filter(status__in=['completed', 'failed', 'cancelled'])
|
||
count = completed_tasks.count()
|
||
completed_tasks.delete()
|
||
|
||
if count > 0:
|
||
self.message_user(request, f'成功删除 {count} 个已完成的任务', messages.SUCCESS)
|
||
delete_completed_tasks.short_description = '删除已完成的任务'
|
||
|
||
def get_urls(self):
|
||
"""添加自定义URL"""
|
||
urls = super().get_urls()
|
||
custom_urls = [
|
||
path(
|
||
'create-keyword-task/',
|
||
self.admin_site.admin_view(self.create_keyword_task_view),
|
||
name='create_keyword_task',
|
||
),
|
||
path(
|
||
'create-historical-task/',
|
||
self.admin_site.admin_view(self.create_historical_task_view),
|
||
name='create_historical_task',
|
||
),
|
||
path(
|
||
'create-full-site-task/',
|
||
self.admin_site.admin_view(self.create_full_site_task_view),
|
||
name='create_full_site_task',
|
||
),
|
||
path(
|
||
'<int:task_id>/start/',
|
||
self.admin_site.admin_view(self.start_task_view),
|
||
name='start_task',
|
||
),
|
||
path(
|
||
'<int:task_id>/cancel/',
|
||
self.admin_site.admin_view(self.cancel_task_view),
|
||
name='cancel_task',
|
||
),
|
||
path(
|
||
'<int:task_id>/results/',
|
||
self.admin_site.admin_view(self.view_results_view),
|
||
name='view_results',
|
||
),
|
||
]
|
||
return custom_urls + urls
|
||
|
||
def create_keyword_task_view(self, request):
|
||
"""创建关键词搜索任务视图"""
|
||
if request.method == 'POST':
|
||
try:
|
||
from .utils import WEBSITE_SEARCH_CONFIGS
|
||
|
||
name = request.POST.get('name', '')
|
||
keyword = request.POST.get('keyword', '')
|
||
websites = request.POST.getlist('websites')
|
||
start_date = request.POST.get('start_date')
|
||
end_date = request.POST.get('end_date')
|
||
max_pages = int(request.POST.get('max_pages', 10))
|
||
max_articles = int(request.POST.get('max_articles', 100))
|
||
|
||
if not name or not keyword:
|
||
self.message_user(request, '任务名称和关键词不能为空', messages.ERROR)
|
||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||
|
||
# 创建任务
|
||
task = CrawlTask.objects.create(
|
||
name=name,
|
||
task_type='keyword',
|
||
keyword=keyword,
|
||
start_date=start_date if start_date else None,
|
||
end_date=end_date if end_date else None,
|
||
max_pages=max_pages,
|
||
max_articles=max_articles,
|
||
created_by=request.user.username if request.user.is_authenticated else 'admin'
|
||
)
|
||
|
||
# 添加选择的网站
|
||
if websites:
|
||
website_objects = Website.objects.filter(name__in=websites)
|
||
task.websites.set(website_objects)
|
||
|
||
self.message_user(request, f'关键词搜索任务 "{name}" 创建成功', messages.SUCCESS)
|
||
return HttpResponseRedirect(reverse('admin:core_crawltask_change', args=[task.id]))
|
||
|
||
except Exception as e:
|
||
self.message_user(request, f'创建任务失败: {e}', messages.ERROR)
|
||
|
||
# GET请求,显示创建表单
|
||
context = {
|
||
'websites': Website.objects.filter(enabled=True),
|
||
'title': '创建关键词搜索任务'
|
||
}
|
||
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/create_keyword_task.html', context)
|
||
|
||
def create_historical_task_view(self, request):
|
||
"""创建历史文章任务视图"""
|
||
if request.method == 'POST':
|
||
try:
|
||
from .utils import WEBSITE_SEARCH_CONFIGS
|
||
|
||
name = request.POST.get('name', '')
|
||
websites = request.POST.getlist('websites')
|
||
start_date = request.POST.get('start_date')
|
||
end_date = request.POST.get('end_date')
|
||
max_articles = int(request.POST.get('max_articles', 50))
|
||
|
||
if not name:
|
||
self.message_user(request, '任务名称不能为空', messages.ERROR)
|
||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||
|
||
# 创建任务
|
||
task = CrawlTask.objects.create(
|
||
name=name,
|
||
task_type='historical',
|
||
keyword='历史文章',
|
||
start_date=start_date if start_date else None,
|
||
end_date=end_date if end_date else None,
|
||
max_articles=max_articles,
|
||
created_by=request.user.username if request.user.is_authenticated else 'admin'
|
||
)
|
||
|
||
# 添加选择的网站
|
||
if websites:
|
||
website_objects = Website.objects.filter(name__in=websites)
|
||
task.websites.set(website_objects)
|
||
|
||
self.message_user(request, f'历史文章任务 "{name}" 创建成功', messages.SUCCESS)
|
||
return HttpResponseRedirect(reverse('admin:core_crawltask_change', args=[task.id]))
|
||
|
||
except Exception as e:
|
||
self.message_user(request, f'创建任务失败: {e}', messages.ERROR)
|
||
|
||
# GET请求,显示创建表单
|
||
context = {
|
||
'websites': Website.objects.filter(enabled=True),
|
||
'title': '创建历史文章任务'
|
||
}
|
||
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/create_historical_task.html', context)
|
||
|
||
def create_full_site_task_view(self, request):
|
||
"""创建全站爬取任务视图"""
|
||
if request.method == 'POST':
|
||
try:
|
||
from .utils import WEBSITE_SEARCH_CONFIGS
|
||
|
||
name = request.POST.get('name', '')
|
||
websites = request.POST.getlist('websites')
|
||
max_pages = int(request.POST.get('max_pages', 500))
|
||
|
||
if not name:
|
||
self.message_user(request, '任务名称不能为空', messages.ERROR)
|
||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||
|
||
# 创建任务
|
||
task = CrawlTask.objects.create(
|
||
name=name,
|
||
task_type='full_site',
|
||
keyword='全站爬取',
|
||
max_pages=max_pages,
|
||
created_by=request.user.username if request.user.is_authenticated else 'admin'
|
||
)
|
||
|
||
# 添加选择的网站
|
||
if websites:
|
||
website_objects = Website.objects.filter(name__in=websites)
|
||
task.websites.set(website_objects)
|
||
|
||
self.message_user(request, f'全站爬取任务 "{name}" 创建成功', messages.SUCCESS)
|
||
return HttpResponseRedirect(reverse('admin:core_crawltask_change', args=[task.id]))
|
||
|
||
except Exception as e:
|
||
self.message_user(request, f'创建任务失败: {e}', messages.ERROR)
|
||
|
||
# GET请求,显示创建表单
|
||
context = {
|
||
'websites': Website.objects.filter(enabled=True),
|
||
'title': '创建全站爬取任务'
|
||
}
|
||
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/create_full_site_task.html', context)
|
||
|
||
def start_task_view(self, request, task_id):
|
||
"""启动任务视图"""
|
||
try:
|
||
success, message = task_executor.start_task(task_id)
|
||
if success:
|
||
self.message_user(request, f'任务已启动: {message}', messages.SUCCESS)
|
||
else:
|
||
self.message_user(request, f'启动任务失败: {message}', messages.ERROR)
|
||
except Exception as e:
|
||
self.message_user(request, f'启动任务失败: {e}', messages.ERROR)
|
||
|
||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||
|
||
def cancel_task_view(self, request, task_id):
|
||
"""取消任务视图"""
|
||
try:
|
||
success, message = task_executor.cancel_task(task_id)
|
||
if success:
|
||
self.message_user(request, f'任务已取消: {message}', messages.SUCCESS)
|
||
else:
|
||
self.message_user(request, f'取消任务失败: {message}', messages.ERROR)
|
||
except Exception as e:
|
||
self.message_user(request, f'取消任务失败: {e}', messages.ERROR)
|
||
|
||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||
|
||
def view_results_view(self, request, task_id):
|
||
"""查看结果视图"""
|
||
try:
|
||
task = CrawlTask.objects.get(id=task_id)
|
||
context = {
|
||
'task': task,
|
||
'title': f'任务结果 - {task.name}'
|
||
}
|
||
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/task_results.html', context)
|
||
except CrawlTask.DoesNotExist:
|
||
self.message_user(request, '任务不存在', messages.ERROR)
|
||
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
|
||
|
||
def render_create_task_template(self, request, template_name, context):
|
||
"""渲染创建任务模板"""
|
||
from django.template.loader import render_to_string
|
||
from django.http import HttpResponse
|
||
|
||
context.update({
|
||
'site_header': admin.site.site_header,
|
||
'site_title': admin.site.site_title,
|
||
'has_permission': True,
|
||
'user': request.user,
|
||
})
|
||
|
||
html = render_to_string(template_name, context)
|
||
return HttpResponse(html)
|
||
|
||
|
||
#class CrawlerStatusAdmin(admin.ModelAdmin):
|
||
# """爬虫状态管理"""
|
||
# change_list_template = 'admin/crawler_status.html'
|
||
#
|
||
# def changelist_view(self, request, extra_context=None):
|
||
# """爬虫状态视图"""
|
||
# # 获取分布式爬虫状态
|
||
# nodes = distributed_crawler.get_available_nodes()
|
||
# node_statuses = []
|
||
#
|
||
# for node_id in nodes:
|
||
# status = distributed_crawler.get_node_status(node_id)
|
||
# node_statuses.append(status)
|
||
#
|
||
# # 获取最近的批次
|
||
# batches = distributed_crawler.get_all_batches()[:10]
|
||
#
|
||
# # 获取任务统计
|
||
# task_stats = {
|
||
# 'active_tasks': len([n for n in node_statuses if n['active_tasks'] > 0]),
|
||
# 'total_nodes': len(nodes),
|
||
# 'total_batches': len(batches),
|
||
# }
|
||
#
|
||
# extra_context = extra_context or {}
|
||
# extra_context.update({
|
||
# 'nodes': node_statuses,
|
||
# 'batches': batches,
|
||
# 'task_stats': task_stats,
|
||
# })
|
||
#
|
||
# return super().changelist_view(request, extra_context)
|
||
#
|
||
|
||
# 注册管理类
|
||
admin.site.register(Website, WebsiteAdmin)
|
||
admin.site.register(Article, ArticleAdmin)
|
||
admin.site.register(CrawlTask, CrawlTaskAdmin)
|
||
|
||
|
||
# 隐藏Celery Results管理功能
|
||
# 禁用django_celery_results应用的自动注册
|
||
try:
|
||
from django_celery_results.models import TaskResult, GroupResult
|
||
from django_celery_results.admin import TaskResultAdmin, GroupResultAdmin
|
||
admin.site.unregister(TaskResult)
|
||
admin.site.unregister(GroupResult)
|
||
except:
|
||
pass
|
||
|
||
# 隐藏Celery Beat周期任务管理功能
|
||
# 禁用django_celery_beat应用的自动注册
|
||
try:
|
||
from django_celery_beat.models import PeriodicTask, ClockedSchedule, CrontabSchedule, SolarSchedule, IntervalSchedule
|
||
admin.site.unregister(PeriodicTask)
|
||
admin.site.unregister(ClockedSchedule)
|
||
admin.site.unregister(CrontabSchedule)
|
||
admin.site.unregister(SolarSchedule)
|
||
admin.site.unregister(IntervalSchedule)
|
||
except:
|
||
pass
|
||
|
||
|
||
|
||
# 自定义管理站点标题
|
||
admin.site.site_header = 'Green Classroom 管理系统'
|
||
admin.site.site_title = 'Green Classroom'
|
||
admin.site.index_title = '欢迎使用 Green Classroom 管理系统'
|