Files
green_classroom/core/admin_extended.py
2025-10-28 10:39:08 +08:00

977 lines
39 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Django Admin扩展
提供增强的管理界面功能
"""
import logging
from datetime import datetime, timedelta
from django.contrib import admin
from django.contrib.admin import SimpleListFilter
from django.contrib.admin.utils import model_format_dict
from django.contrib import messages
from django.http import HttpResponseRedirect
from django.urls import path, reverse
from django.utils.html import format_html
from django.utils import timezone
from django.db.models import Count, Q
from django.core.cache import cache
from .models import Website, Article, CrawlTask, SiteConfig
from .tasks import crawl_website, crawl_all_websites, cleanup_old_articles
from .distributed_crawler import distributed_crawler
from .task_executor import task_executor
logger = logging.getLogger(__name__)
class WebsiteStatusFilter(SimpleListFilter):
"""网站状态过滤器"""
title = '网站状态'
parameter_name = 'status'
def lookups(self, request, model_admin):
return (
('enabled', '已启用'),
('disabled', '已禁用'),
('no_articles', '无文章'),
('recent_crawl', '最近爬取'),
)
def queryset(self, request, queryset):
if self.value() == 'enabled':
return queryset.filter(enabled=True)
elif self.value() == 'disabled':
return queryset.filter(enabled=False)
elif self.value() == 'no_articles':
return queryset.annotate(article_count=Count('article')).filter(article_count=0)
elif self.value() == 'recent_crawl':
week_ago = timezone.now() - timedelta(days=7)
return queryset.filter(last_crawl__gte=week_ago)
return queryset
class ArticleDateFilter(SimpleListFilter):
"""文章日期过滤器"""
title = '发布时间'
parameter_name = 'date_range'
def lookups(self, request, model_admin):
return (
('today', '今天'),
('week', '本周'),
('month', '本月'),
('quarter', '本季度'),
)
def queryset(self, request, queryset):
now = timezone.now()
if self.value() == 'today':
return queryset.filter(created_at__date=now.date())
elif self.value() == 'week':
week_start = now - timedelta(days=now.weekday())
return queryset.filter(created_at__gte=week_start.replace(hour=0, minute=0, second=0))
elif self.value() == 'month':
return queryset.filter(created_at__year=now.year, created_at__month=now.month)
elif self.value() == 'quarter':
quarter = (now.month - 1) // 3
quarter_start_month = quarter * 3 + 1
return queryset.filter(
created_at__year=now.year,
created_at__month__gte=quarter_start_month,
created_at__month__lt=quarter_start_month + 3
)
return queryset
class WebsiteAdmin(admin.ModelAdmin):
"""网站管理"""
list_display = [
'name', 'base_url', 'enabled', 'article_count',
'last_crawl_display', 'status_indicator', 'actions_column'
]
list_filter = [WebsiteStatusFilter, 'enabled']
search_fields = ['name', 'base_url']
readonly_fields = ['article_count']
actions = ['enable_websites', 'disable_websites', 'crawl_selected', 'crawl_all']
fieldsets = (
('基本信息', {
'fields': ('name', 'base_url', 'enabled')
}),
('统计信息', {
'fields': ('article_count',),
'classes': ('collapse',)
}),
('时间信息', {
'fields': (),
'classes': ('collapse',)
}),
)
# 添加get_websites方法以支持模板中的网站选择
def get_websites(self, request):
"""获取所有启用的网站,用于模板中的选择框"""
return Website.objects.filter(enabled=True)
def article_count(self, obj):
"""文章数量"""
return obj.article_set.count()
article_count.short_description = '文章数量'
def last_crawl_display(self, obj):
"""最后爬取时间显示"""
return '未实现'
last_crawl_display.short_description = '最后爬取'
def status_indicator(self, obj):
"""状态指示器"""
if obj.enabled:
return format_html('<span style="color: green;">●</span> 正常')
else:
return format_html('<span style="color: red;">●</span> 禁用')
status_indicator.short_description = '状态'
def actions_column(self, obj):
"""操作列"""
return format_html(
'<a href="{}" class="button">爬取</a> '
'<a href="{}" class="button">查看文章</a>',
reverse('admin:crawl_website', args=[obj.id]),
reverse('admin:core_article_changelist') + f'?website__id__exact={obj.id}'
)
actions_column.short_description = '操作'
def enable_websites(self, request, queryset):
"""启用选中的网站"""
updated = queryset.update(enabled=True)
self.message_user(request, f'成功启用 {updated} 个网站')
enable_websites.short_description = '启用选中的网站'
def disable_websites(self, request, queryset):
"""禁用选中的网站"""
updated = queryset.update(enabled=False)
self.message_user(request, f'成功禁用 {updated} 个网站')
disable_websites.short_description = '禁用选中的网站'
def crawl_selected(self, request, queryset):
"""爬取选中的网站"""
for website in queryset:
try:
task = crawl_website.delay(website.id)
self.message_user(
request,
f'网站 {website.name} 爬取任务已启动 (任务ID: {task.id})',
messages.SUCCESS
)
except Exception as e:
error_msg = str(e)
if "[Errno 61] Connection refused" in error_msg:
detailed_msg = "连接被拒绝可能是Redis或其他依赖服务未启动。请检查以下几点\n1. Redis服务是否运行 (尝试运行: redis-server)\n2. 如果使用Docker请确保容器正在运行\n3. 检查Django配置中的CELERY_BROKER_URL设置\n4. 在本地开发环境中,可以运行 'python manage.py runserver''celery -A myproject worker -l info' 来启动必要的服务"
else:
detailed_msg = error_msg
self.message_user(
request,
f'网站 {website.name} 爬取任务启动失败: {detailed_msg}',
messages.ERROR
)
crawl_selected.short_description = '爬取选中的网站'
def crawl_all(self, request, queryset):
try:
task = crawl_all_websites.delay()
self.message_user(
request,
f'批量爬取任务已启动 (任务ID: {task.id})',
messages.SUCCESS
)
except Exception as e:
error_msg = str(e)
if "[Errno 61] Connection refused" in error_msg:
detailed_msg = "连接被拒绝可能是Redis或其他依赖服务未启动。请检查以下几点\n1. Redis服务是否运行 (尝试运行: redis-server)\n2. 如果使用Docker请确保容器正在运行\n3. 检查Django配置中的CELERY_BROKER_URL设置\n4. 在本地开发环境中,可以运行 'python manage.py runserver''celery -A myproject worker -l info' 来启动必要的服务"
else:
detailed_msg = error_msg
self.message_user(
request,
f'批量爬取任务启动失败: {detailed_msg}',
messages.ERROR
)
# crawl_all.short_description = '爬取所有网站'
def get_urls(self):
"""添加自定义URL"""
urls = super().get_urls()
custom_urls = [
path(
'<int:website_id>/crawl/',
self.admin_site.admin_view(self.crawl_website_view),
name='crawl_website',
),
path(
'run-crawler/',
self.admin_site.admin_view(self.run_crawler_view),
name='run_crawler',
),
]
return custom_urls + urls
def crawl_website_view(self, request, website_id):
"""爬取单个网站视图"""
try:
website = Website.objects.get(id=website_id)
task = crawl_website.delay(website_id)
self.message_user(
request,
f'网站 {website.name} 爬取任务已启动 (任务ID: {task.id})',
messages.SUCCESS
)
except Website.DoesNotExist:
self.message_user(request, '网站不存在', messages.ERROR)
except Exception as e:
error_msg = str(e)
if "[Errno 61] Connection refused" in error_msg:
detailed_msg = "连接被拒绝可能是Redis或其他依赖服务未启动。请检查以下几点\n1. Redis服务是否运行 (尝试运行: redis-server)\n2. 如果使用Docker请确保容器正在运行\n3. 检查Django配置中的CELERY_BROKER_URL设置\n4. 在本地开发环境中,可以运行 'python manage.py runserver''celery -A myproject worker -l info' 来启动必要的服务"
else:
detailed_msg = error_msg
self.message_user(request, f'爬取任务启动失败: {detailed_msg}', messages.ERROR)
return HttpResponseRedirect(reverse('admin:core_website_changelist'))
def run_crawler_view(self, request):
"""运行爬虫视图"""
try:
task = crawl_all_websites.delay()
self.message_user(
request,
f'批量爬取任务已启动 (任务ID: {task.id})',
messages.SUCCESS
)
except Exception as e:
error_msg = str(e)
if "[Errno 61] Connection refused" in error_msg:
detailed_msg = "连接被拒绝可能是Redis或其他依赖服务未启动。请检查以下几点\n1. Redis服务是否运行 (尝试运行: redis-server)\n2. 如果使用Docker请确保容器正在运行\n3. 检查Django配置中的CELERY_BROKER_URL设置\n4. 在本地开发环境中,可以运行 'python manage.py runserver''celery -A myproject worker -l info' 来启动必要的服务"
else:
detailed_msg = error_msg
self.message_user(
request,
f'批量爬取任务启动失败: {detailed_msg}',
messages.ERROR
)
return HttpResponseRedirect(reverse('admin:core_website_changelist'))
class ArticleAdmin(admin.ModelAdmin):
"""文章管理"""
list_display = [
'title', 'website', 'created_at',
'media_count', 'actions_column'
]
list_filter = [
ArticleDateFilter, 'website', 'created_at'
]
search_fields = ['title', 'content', 'url']
readonly_fields = ['created_at', 'media_files_display']
date_hierarchy = 'created_at'
fieldsets = (
('基本信息', {
'fields': ('title', 'url', 'website')
}),
('内容', {
'fields': ('content',)
}),
('媒体文件', {
'fields': ('media_files_display',),
'classes': ('collapse',)
}),
('时间信息', {
'fields': ('created_at',),
'classes': ('collapse',)
}),
)
# 添加导出选中文章的操作
actions = ['export_selected_articles']
def export_selected_articles(self, request, queryset):
"""
导出选中的文章为ZIP文件
"""
import zipfile
from django.http import HttpResponse
from io import BytesIO
from django.conf import settings
import os
from bs4 import BeautifulSoup
from docx import Document
# 创建内存中的ZIP文件
zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, 'w') as zip_file:
# 为每篇文章创建文件夹并添加内容
for article in queryset:
# 创建文章文件夹名称
article_folder = f"article_{article.id}_{article.title.replace('/', '_').replace('\\', '_').replace(':', '_').replace('*', '_').replace('?', '_').replace('"', '_').replace('<', '_').replace('>', '_').replace('|', '_')}"
# 创建Word文档
doc = Document()
doc.add_heading(article.title, 0)
# 添加文章信息
doc.add_paragraph(f"网站: {article.website.name if article.website else ''}")
doc.add_paragraph(f"URL: {article.url}")
doc.add_paragraph(f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else ''}")
doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S') if article.created_at else ''}")
# 添加内容标题
doc.add_heading('内容:', level=1)
# 处理HTML内容
soup = BeautifulSoup(article.content, 'html.parser')
content_text = soup.get_text()
doc.add_paragraph(content_text)
# 将Word文档保存到内存中
doc_buffer = BytesIO()
doc.save(doc_buffer)
doc_buffer.seek(0)
# 将Word文档添加到ZIP文件
zip_file.writestr(os.path.join(article_folder, f'{article.title.replace("/", "_")}.docx'), doc_buffer.getvalue())
# 添加媒体文件到ZIP包
if article.media_files:
for media_file in article.media_files:
try:
full_path = os.path.join(settings.MEDIA_ROOT, media_file)
if os.path.exists(full_path):
# 添加文件到ZIP包
zip_file.write(full_path, os.path.join(article_folder, 'media', os.path.basename(media_file)))
except Exception as e:
# 如果添加媒体文件失败,继续处理其他文件
pass
# 创建HttpResponse
zip_buffer.seek(0)
response = HttpResponse(zip_buffer.getvalue(), content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename=selected_articles.zip'
return response
export_selected_articles.short_description = "导出所选的文章为ZIP"
def content_preview(self, obj):
"""内容预览"""
return obj.content[:100] + '...' if len(obj.content) > 100 else obj.content
content_preview.short_description = '内容预览'
def media_count(self, obj):
"""媒体文件数量"""
if obj.media_files:
return len(obj.media_files)
return 0
media_count.short_description = '媒体文件'
def media_files_display(self, obj):
"""媒体文件显示"""
if not obj.media_files:
return '无媒体文件'
html = '<div style="max-height: 300px; overflow-y: auto;">'
for i, media in enumerate(obj.media_files):
if media.get('type') == 'image':
html += f'<div style="margin: 10px 0;"><img src="{media["url"]}" style="max-width: 200px; max-height: 150px;" /></div>'
elif media.get('type') == 'video':
html += f'<div style="margin: 10px 0;"><video controls style="max-width: 200px;"><source src="{media["url"]}" type="video/mp4"></video></div>'
html += '</div>'
return format_html(html)
media_files_display.short_description = '媒体文件'
def actions_column(self, obj):
"""操作列"""
# 修改: 添加跳转到本地文章详情页的链接
return format_html(
'<a href="{}" target="_blank" class="button">查看原文</a> '
'<a href="{}" target="_blank" class="button">本地查看</a>',
obj.url,
reverse('article_detail', args=[obj.id])
)
actions_column.short_description = '操作'
class CrawlTaskStatusFilter(SimpleListFilter):
"""爬取任务状态过滤器"""
title = '任务状态'
parameter_name = 'status'
def lookups(self, request, model_admin):
return (
('pending', '等待中'),
('running', '运行中'),
('completed', '已完成'),
('failed', '失败'),
('cancelled', '已取消'),
)
def queryset(self, request, queryset):
if self.value():
return queryset.filter(status=self.value())
return queryset
class CrawlTaskTypeFilter(SimpleListFilter):
"""爬取任务类型过滤器"""
title = '任务类型'
parameter_name = 'task_type'
def lookups(self, request, model_admin):
return (
('keyword', '关键词搜索'),
('historical', '历史文章'),
('full_site', '全站爬取'),
)
def queryset(self, request, queryset):
if self.value():
return queryset.filter(task_type=self.value())
return queryset
class CrawlTaskAdmin(admin.ModelAdmin):
"""爬取任务管理"""
list_display = [
'name', 'task_type', 'keyword', 'websites_display', 'status',
'progress_display', 'created_at', 'duration_display', 'actions_column'
]
list_filter = [CrawlTaskStatusFilter, CrawlTaskTypeFilter, 'created_at']
search_fields = ['name', 'keyword', 'created_by']
readonly_fields = [
'status', 'progress', 'current_website', 'current_action',
'total_articles', 'success_count', 'failed_count',
'created_at', 'started_at', 'completed_at', 'error_message',
'result_details', 'duration_display', 'progress_display',
'execution_count', 'last_execution_at', 'execution_summary'
]
actions = ['start_tasks', 'rerun_tasks', 'cancel_tasks', 'delete_completed_tasks']
class Media:
js = ('admin/js/crawl_task_actions.js',)
fieldsets = (
('基本信息', {
'fields': ('name', 'task_type', 'keyword')
}),
('爬取配置', {
'fields': ('websites', 'start_date', 'end_date', 'max_pages', 'max_articles')
}),
('任务状态', {
'fields': ('status', 'progress_display', 'current_website', 'current_action'),
'classes': ('collapse',)
}),
('统计信息', {
'fields': ('total_articles', 'success_count', 'failed_count'),
'classes': ('collapse',)
}),
('时间信息', {
'fields': ('created_at', 'started_at', 'completed_at', 'duration_display'),
'classes': ('collapse',)
}),
('执行历史', {
'fields': ('execution_count', 'last_execution_at', 'execution_summary'),
'classes': ('collapse',)
}),
('错误信息', {
'fields': ('error_message',),
'classes': ('collapse',)
}),
('结果详情', {
'fields': ('result_details',),
'classes': ('collapse',)
}),
)
def websites_display(self, obj):
"""网站列表显示"""
return obj.get_websites_display()
websites_display.short_description = '目标网站'
def progress_display(self, obj):
"""进度显示"""
if obj.status == 'running':
return format_html(
'<div style="width: 100px; background-color: #f0f0f0; border-radius: 3px;">'
'<div style="width: {}%; background-color: #4CAF50; height: 20px; border-radius: 3px; text-align: center; color: white; line-height: 20px;">{}%</div>'
'</div>',
obj.progress, obj.progress
)
elif obj.status == 'completed':
return format_html('<span style="color: green;">✓ 完成</span>')
elif obj.status == 'failed':
return format_html('<span style="color: red;">✗ 失败</span>')
elif obj.status == 'cancelled':
return format_html('<span style="color: orange;">⊘ 已取消</span>')
else:
return format_html('<span style="color: gray;">⏳ 等待</span>')
progress_display.short_description = '进度'
def duration_display(self, obj):
"""执行时长显示"""
duration = obj.get_duration()
if duration:
total_seconds = int(duration.total_seconds())
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
seconds = total_seconds % 60
if hours > 0:
return f"{hours}小时{minutes}分钟"
elif minutes > 0:
return f"{minutes}分钟{seconds}"
else:
return f"{seconds}"
return "-"
duration_display.short_description = '执行时长'
def execution_summary(self, obj):
"""执行摘要显示"""
return obj.get_execution_summary()
execution_summary.short_description = '执行摘要'
def actions_column(self, obj):
"""操作列"""
actions = []
if obj.status == 'pending':
actions.append(f'<a href="javascript:void(0)" onclick="startTask({obj.id})" class="button">开始</a>')
if obj.can_cancel():
actions.append(f'<a href="javascript:void(0)" onclick="cancelTask({obj.id})" class="button">取消</a>')
if obj.status == 'completed':
actions.append(f'<a href="javascript:void(0)" onclick="viewResults({obj.id})" class="button">查看结果</a>')
actions.append(f'<a href="javascript:void(0)" onclick="rerunTask({obj.id})" class="button" style="background-color: #28a745;">重新执行</a>')
if obj.status in ['failed', 'cancelled']:
actions.append(f'<a href="javascript:void(0)" onclick="rerunTask({obj.id})" class="button" style="background-color: #28a745;">重新执行</a>')
return format_html(' '.join(actions))
actions_column.short_description = '操作'
def start_tasks(self, request, queryset):
"""启动选中的任务"""
started_count = 0
for task in queryset.filter(status='pending'):
try:
success, message = task_executor.start_task(task.id)
if success:
started_count += 1
else:
self.message_user(request, f'启动任务 {task.name} 失败: {message}', messages.ERROR)
except Exception as e:
self.message_user(request, f'启动任务 {task.name} 失败: {e}', messages.ERROR)
if started_count > 0:
self.message_user(request, f'成功启动 {started_count} 个任务', messages.SUCCESS)
start_tasks.short_description = '启动选中的任务'
def rerun_tasks(self, request, queryset):
"""重新执行选中的任务"""
rerun_count = 0
for task in queryset.filter(status__in=['completed', 'failed', 'cancelled']):
try:
success, message = task_executor.rerun_task(task.id)
if success:
rerun_count += 1
else:
self.message_user(request, f'重新执行任务 {task.name} 失败: {message}', messages.ERROR)
except Exception as e:
self.message_user(request, f'重新执行任务 {task.name} 失败: {e}', messages.ERROR)
if rerun_count > 0:
self.message_user(request, f'成功重新执行 {rerun_count} 个任务', messages.SUCCESS)
rerun_tasks.short_description = '重新执行选中的任务'
def cancel_tasks(self, request, queryset):
"""取消选中的任务"""
cancelled_count = 0
for task in queryset.filter(status__in=['pending', 'running']):
try:
success, message = task_executor.cancel_task(task.id)
if success:
cancelled_count += 1
else:
self.message_user(request, f'取消任务 {task.name} 失败: {message}', messages.ERROR)
except Exception as e:
self.message_user(request, f'取消任务 {task.name} 失败: {e}', messages.ERROR)
if cancelled_count > 0:
self.message_user(request, f'成功取消 {cancelled_count} 个任务', messages.SUCCESS)
elif queryset.filter(status__in=['pending', 'running']).count() > 0:
# 有任务但没有成功取消任何任务
self.message_user(request, '没有成功取消任何任务', messages.WARNING)
cancel_tasks.short_description = '取消选中的任务'
def delete_completed_tasks(self, request, queryset):
"""删除已完成的任务"""
completed_tasks = queryset.filter(status__in=['completed', 'failed', 'cancelled'])
count = completed_tasks.count()
completed_tasks.delete()
if count > 0:
self.message_user(request, f'成功删除 {count} 个已完成的任务', messages.SUCCESS)
delete_completed_tasks.short_description = '删除已完成的任务'
def get_urls(self):
"""添加自定义URL"""
urls = super().get_urls()
custom_urls = [
path(
'create-keyword-task/',
self.admin_site.admin_view(self.create_keyword_task_view),
name='create_keyword_task',
),
path(
'create-historical-task/',
self.admin_site.admin_view(self.create_historical_task_view),
name='create_historical_task',
),
path(
'create-full-site-task/',
self.admin_site.admin_view(self.create_full_site_task_view),
name='create_full_site_task',
),
path(
'<int:task_id>/start/',
self.admin_site.admin_view(self.start_task_view),
name='start_task',
),
path(
'<int:task_id>/cancel/',
self.admin_site.admin_view(self.cancel_task_view),
name='cancel_task',
),
path(
'<int:task_id>/rerun/',
self.admin_site.admin_view(self.rerun_task_view),
name='rerun_task',
),
path(
'<int:task_id>/results/',
self.admin_site.admin_view(self.view_results_view),
name='view_results',
),
]
return custom_urls + urls
def create_keyword_task_view(self, request):
"""创建关键词搜索任务视图"""
if request.method == 'POST':
try:
from .utils import WEBSITE_CRAWL_CONFIGS
name = request.POST.get('name', '')
keyword = request.POST.get('keyword', '')
websites = request.POST.getlist('websites')
start_date = request.POST.get('start_date')
end_date = request.POST.get('end_date')
max_pages = int(request.POST.get('max_pages', 10))
max_articles = int(request.POST.get('max_articles', 100))
if not name or not keyword:
self.message_user(request, '任务名称和关键词不能为空', messages.ERROR)
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
# 创建任务
task = CrawlTask.objects.create(
name=name,
task_type='keyword',
keyword=keyword,
start_date=start_date if start_date else None,
end_date=end_date if end_date else None,
max_pages=max_pages,
max_articles=max_articles,
created_by=request.user.username if request.user.is_authenticated else 'admin'
)
# 添加选择的网站
if websites:
website_objects = Website.objects.filter(name__in=websites)
task.websites.set(website_objects)
self.message_user(request, f'关键词搜索任务 "{name}" 创建成功', messages.SUCCESS)
return HttpResponseRedirect(reverse('admin:core_crawltask_change', args=[task.id]))
except Exception as e:
self.message_user(request, f'创建任务失败: {e}', messages.ERROR)
# GET请求显示创建表单
context = {
'websites': Website.objects.filter(enabled=True),
'title': '创建关键词搜索任务'
}
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/create_keyword_task.html', context)
def create_historical_task_view(self, request):
"""创建历史文章任务视图"""
if request.method == 'POST':
try:
from .utils import WEBSITE_CRAWL_CONFIGS
name = request.POST.get('name', '')
websites = request.POST.getlist('websites')
start_date = request.POST.get('start_date')
end_date = request.POST.get('end_date')
max_articles = int(request.POST.get('max_articles', 50))
if not name:
self.message_user(request, '任务名称不能为空', messages.ERROR)
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
# 创建任务
task = CrawlTask.objects.create(
name=name,
task_type='historical',
keyword='历史文章',
start_date=start_date if start_date else None,
end_date=end_date if end_date else None,
max_articles=max_articles,
created_by=request.user.username if request.user.is_authenticated else 'admin'
)
# 添加选择的网站
if websites:
website_objects = Website.objects.filter(name__in=websites)
task.websites.set(website_objects)
self.message_user(request, f'历史文章任务 "{name}" 创建成功', messages.SUCCESS)
return HttpResponseRedirect(reverse('admin:core_crawltask_change', args=[task.id]))
except Exception as e:
self.message_user(request, f'创建任务失败: {e}', messages.ERROR)
# GET请求显示创建表单
context = {
'websites': Website.objects.filter(enabled=True),
'title': '创建历史文章任务'
}
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/create_historical_task.html', context)
def create_full_site_task_view(self, request):
"""创建全站爬取任务视图"""
if request.method == 'POST':
try:
from .utils import WEBSITE_CRAWL_CONFIGS
name = request.POST.get('name', '')
websites = request.POST.getlist('websites')
max_pages = int(request.POST.get('max_pages', 500))
if not name:
self.message_user(request, '任务名称不能为空', messages.ERROR)
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
# 创建任务
task = CrawlTask.objects.create(
name=name,
task_type='full_site',
keyword='全站爬取',
max_pages=max_pages,
created_by=request.user.username if request.user.is_authenticated else 'admin'
)
# 添加选择的网站
if websites:
website_objects = Website.objects.filter(name__in=websites)
task.websites.set(website_objects)
self.message_user(request, f'全站爬取任务 "{name}" 创建成功', messages.SUCCESS)
return HttpResponseRedirect(reverse('admin:core_crawltask_change', args=[task.id]))
except Exception as e:
self.message_user(request, f'创建任务失败: {e}', messages.ERROR)
# GET请求显示创建表单
context = {
'websites': Website.objects.filter(enabled=True),
'title': '创建全站爬取任务'
}
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/create_full_site_task.html', context)
def start_task_view(self, request, task_id):
"""启动任务视图"""
try:
success, message = task_executor.start_task(task_id)
if success:
self.message_user(request, f'任务已启动: {message}', messages.SUCCESS)
else:
self.message_user(request, f'启动任务失败: {message}', messages.ERROR)
except Exception as e:
self.message_user(request, f'启动任务失败: {e}', messages.ERROR)
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
def rerun_task_view(self, request, task_id):
"""重新执行任务视图"""
try:
success, message = task_executor.rerun_task(task_id)
if success:
self.message_user(request, f'任务已重新执行: {message}', messages.SUCCESS)
else:
self.message_user(request, f'重新执行任务失败: {message}', messages.ERROR)
except Exception as e:
self.message_user(request, f'重新执行任务失败: {e}', messages.ERROR)
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
def cancel_task_view(self, request, task_id):
"""取消任务视图"""
try:
success, message = task_executor.cancel_task(task_id)
if success:
self.message_user(request, f'任务已取消: {message}', messages.SUCCESS)
else:
self.message_user(request, f'取消任务失败: {message}', messages.ERROR)
except Exception as e:
self.message_user(request, f'取消任务失败: {e}', messages.ERROR)
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
def view_results_view(self, request, task_id):
"""查看结果视图"""
try:
task = CrawlTask.objects.get(id=task_id)
context = {
'task': task,
'title': f'任务结果 - {task.name}'
}
return admin.site.admin_view(self.render_create_task_template)(request, 'admin/task_results.html', context)
except CrawlTask.DoesNotExist:
self.message_user(request, '任务不存在', messages.ERROR)
return HttpResponseRedirect(reverse('admin:core_crawltask_changelist'))
def render_create_task_template(self, request, template_name, context):
"""渲染创建任务模板"""
from django.template.loader import render_to_string
from django.http import HttpResponse
context.update({
'site_header': admin.site.site_header,
'site_title': admin.site.site_title,
'has_permission': True,
'user': request.user,
})
html = render_to_string(template_name, context)
return HttpResponse(html)
#class CrawlerStatusAdmin(admin.ModelAdmin):
# """爬虫状态管理"""
# change_list_template = 'admin/crawler_status.html'
#
# def changelist_view(self, request, extra_context=None):
# """爬虫状态视图"""
# # 获取分布式爬虫状态
# nodes = distributed_crawler.get_available_nodes()
# node_statuses = []
#
# for node_id in nodes:
# status = distributed_crawler.get_node_status(node_id)
# node_statuses.append(status)
#
# # 获取最近的批次
# batches = distributed_crawler.get_all_batches()[:10]
#
# # 获取任务统计
# task_stats = {
# 'active_tasks': len([n for n in node_statuses if n['active_tasks'] > 0]),
# 'total_nodes': len(nodes),
# 'total_batches': len(batches),
# }
#
# extra_context = extra_context or {}
# extra_context.update({
# 'nodes': node_statuses,
# 'batches': batches,
# 'task_stats': task_stats,
# })
#
# return super().changelist_view(request, extra_context)
#
class SiteConfigAdmin(admin.ModelAdmin):
"""网站配置管理"""
list_display = ['site_title', 'show_title', 'header_background_color', 'header_background_size', 'header_background_position', 'header_height', 'created_at', 'updated_at']
readonly_fields = ['created_at', 'updated_at']
fieldsets = (
('基本信息', {
'fields': ('site_title', 'show_title')
}),
('版头设置', {
'fields': ('header_background_image', 'header_background_color', 'header_background_size', 'header_background_position', 'header_height'),
'description': '上传背景图片后,可以调整图片的显示大小、位置和版头高度'
}),
('时间信息', {
'fields': ('created_at', 'updated_at'),
'classes': ('collapse',)
}),
)
def has_add_permission(self, request):
"""只允许有一个配置实例"""
return not SiteConfig.objects.exists()
def has_delete_permission(self, request, obj=None):
"""不允许删除配置"""
return False
# 注册管理类
admin.site.register(SiteConfig, SiteConfigAdmin)
admin.site.register(Website, WebsiteAdmin)
admin.site.register(Article, ArticleAdmin)
admin.site.register(CrawlTask, CrawlTaskAdmin)
# 隐藏Celery Results管理功能
# 禁用django_celery_results应用的自动注册
try:
from django_celery_results.models import TaskResult, GroupResult
from django_celery_results.admin import TaskResultAdmin, GroupResultAdmin
admin.site.unregister(TaskResult)
admin.site.unregister(GroupResult)
except:
pass
# 隐藏Celery Beat周期任务管理功能
# 禁用django_celery_beat应用的自动注册
try:
from django_celery_beat.models import PeriodicTask, ClockedSchedule, CrontabSchedule, SolarSchedule, IntervalSchedule
admin.site.unregister(PeriodicTask)
admin.site.unregister(ClockedSchedule)
admin.site.unregister(CrontabSchedule)
admin.site.unregister(SolarSchedule)
admin.site.unregister(IntervalSchedule)
except:
pass
# 自定义管理站点标题
admin.site.site_header = 'Green Classroom 管理系统'
admin.site.site_title = 'Green Classroom'
admin.site.index_title = '欢迎使用 Green Classroom 管理系统'