Files
icac/crawler/admin.py

199 lines
7.9 KiB
Python

from django.contrib import admin
from django.utils.html import format_html
from django.urls import reverse
from django.utils.safestring import mark_safe
from .models import Website, CrawlTask, CrawledContent, CrawlLog, SearchKeyword, MediaFile
from .tasks import crawl_websites_task
@admin.register(Website)
class WebsiteAdmin(admin.ModelAdmin):
list_display = ['name', 'region', 'url', 'is_active', 'created_at']
list_filter = ['region', 'is_active', 'created_at']
search_fields = ['name', 'url', 'region']
list_editable = ['is_active']
ordering = ['region', 'name']
@admin.register(CrawlTask)
class CrawlTaskAdmin(admin.ModelAdmin):
list_display = ['name', 'status', 'created_by', 'progress_display', 'created_at', 'completed_at']
list_filter = ['status', 'created_by', 'created_at']
search_fields = ['name', 'keywords']
readonly_fields = ['created_at', 'started_at', 'completed_at', 'progress_display']
filter_horizontal = ['websites']
actions = ['execute_crawl_task']
def progress_display(self, obj):
"""显示任务进度"""
if obj.status == 'completed':
color = 'green'
elif obj.status == 'failed':
color = 'red'
elif obj.status == 'running':
color = 'orange'
else:
color = 'gray'
return format_html(
'<span style="color: {};">{}%</span>',
color,
f'{obj.progress_percentage:.1f} ({obj.crawled_pages}/{obj.total_pages})'
)
progress_display.short_description = '进度'
def execute_crawl_task(self, request, queryset):
"""执行选中的爬取任务"""
for task in queryset:
# 更新任务状态为pending
task.status = 'pending'
task.save()
# 异步执行爬取任务
crawl_websites_task.delay(task.id)
self.message_user(request, f"已启动 {queryset.count()} 个爬取任务。")
execute_crawl_task.short_description = "执行选中的爬取任务"
@admin.register(CrawledContent)
class CrawledContentAdmin(admin.ModelAdmin):
list_display = ['title_short', 'website', 'task', 'keywords_matched', 'media_count', 'publish_date', 'is_local_saved', 'created_at']
list_filter = ['website', 'task', 'created_at', 'publish_date', 'is_local_saved']
search_fields = ['title', 'content', 'keywords_matched']
readonly_fields = ['created_at', 'preview_content', 'media_files_display']
ordering = ['-created_at']
def title_short(self, obj):
"""显示缩短的标题"""
return obj.title[:50] + '...' if len(obj.title) > 50 else obj.title
title_short.short_description = '标题'
def media_count(self, obj):
"""显示媒体文件数量"""
count = obj.media_files.count()
if count > 0:
return format_html(
'<span style="color: green; font-weight: bold;">{}</span>',
count
)
return "0"
media_count.short_description = '媒体文件'
def preview_content(self, obj):
"""预览内容"""
if obj.is_local_saved:
url = reverse('preview_crawled_content', args=[obj.id])
return format_html(
'<a href="{}" target="_blank" class="button">预览文章</a>',
url
)
elif obj.content:
return format_html(
'<div style="max-height: 200px; overflow-y: auto; border: 1px solid #ddd; padding: 10px;">{}</div>',
obj.get_preview_content(500)
)
else:
return "无内容"
preview_content.short_description = '内容预览'
def media_files_display(self, obj):
"""显示媒体文件列表"""
media_files = obj.media_files.all()
if not media_files:
return "无媒体文件"
html = "<div style='max-height: 300px; overflow-y: auto;'>"
for media_file in media_files:
if media_file.media_type == 'image':
html += format_html(
'<div style="margin-bottom: 10px; border: 1px solid #ddd; padding: 5px;">'
'<strong>图片:</strong> {}<br>'
'<img src="/media/{}" style="max-width: 150px; max-height: 150px;" /><br>'
'<small>大小: {}</small>'
'</div>',
media_file.alt_text or '无标题',
media_file.local_file.name,
media_file.file_size_display
)
elif media_file.media_type == 'video':
html += format_html(
'<div style="margin-bottom: 10px; border: 1px solid #ddd; padding: 5px;">'
'<strong>视频:</strong><br>'
'<video controls style="max-width: 200px; max-height: 150px;">'
'<source src="/media/{}" type="{}">'
'</video><br>'
'<small>大小: {}</small>'
'</div>',
media_file.local_file.name,
media_file.mime_type,
media_file.file_size_display
)
else:
html += format_html(
'<div style="margin-bottom: 10px; border: 1px solid #ddd; padding: 5px;">'
'<strong>{}:</strong> <a href="/media/{}" download>下载</a><br>'
'<small>大小: {}</small>'
'</div>',
media_file.get_media_type_display(),
media_file.local_file.name,
media_file.file_size_display
)
html += "</div>"
return format_html(html)
media_files_display.short_description = '媒体文件'
@admin.register(CrawlLog)
class CrawlLogAdmin(admin.ModelAdmin):
list_display = ['level', 'message_short', 'website', 'task', 'created_at']
list_filter = ['level', 'website', 'task', 'created_at']
search_fields = ['message']
readonly_fields = ['created_at']
ordering = ['-created_at']
def message_short(self, obj):
"""显示缩短的消息"""
return obj.message[:100] + '...' if len(obj.message) > 100 else obj.message
message_short.short_description = '消息'
@admin.register(MediaFile)
class MediaFileAdmin(admin.ModelAdmin):
list_display = ['content', 'media_type', 'file_size_display', 'mime_type', 'created_at']
list_filter = ['media_type', 'created_at']
search_fields = ['content__title', 'original_url', 'alt_text']
readonly_fields = ['created_at', 'file_size_display', 'media_preview']
ordering = ['-created_at']
def media_preview(self, obj):
"""媒体文件预览"""
if obj.media_type == 'image' and obj.local_file:
return format_html(
'<img src="/media/{}" style="max-width: 200px; max-height: 200px;" />',
obj.local_file.name
)
elif obj.media_type == 'video' and obj.local_file:
return format_html(
'<video controls style="max-width: 200px; max-height: 200px;"><source src="/media/{}" type="{}"></video>',
obj.local_file.name,
obj.mime_type
)
elif obj.media_type == 'audio' and obj.local_file:
return format_html(
'<audio controls><source src="/media/{}" type="{}"></audio>',
obj.local_file.name,
obj.mime_type
)
else:
return "无预览"
media_preview.short_description = '预览'
@admin.register(SearchKeyword)
class SearchKeywordAdmin(admin.ModelAdmin):
list_display = ['keyword', 'is_active', 'created_at', 'last_used']
list_filter = ['is_active', 'created_at', 'last_used']
search_fields = ['keyword']
list_editable = ['is_active']
ordering = ['-last_used', '-created_at']