This commit is contained in:
2025-08-13 21:35:11 +08:00
parent c618528a0a
commit 31d0525cd0
10 changed files with 243 additions and 897 deletions

View File

@@ -1,9 +1,6 @@
from django.contrib import admin
from django.contrib.admin import AdminSite
from .models import Website, Article
# 添加actions相关的导入
from django.contrib import messages
from django.http import HttpResponseRedirect
# 添加导出功能所需导入
import csv
from django.http import HttpResponse
@@ -12,17 +9,7 @@ import json
from django.shortcuts import render, redirect
from django.urls import path
from django.contrib import admin
from django.http import JsonResponse
from django.views.decorators.http import require_http_methods
from django.core.management import call_command
import threading
import uuid
from django.utils import timezone
# 创建自定义管理站点
# 实例化管理站点
# 添加运行爬虫的视图函数
def run_crawler_view(request):
@@ -36,21 +23,24 @@ def run_crawler_view(request):
return redirect('admin:core_article_changelist')
try:
# 根据网站名称确定要执行的爬虫命令
if website_name == 'crawl_xinhua':
crawler_name = 'crawl_xinhua'
elif website_name == 'crawl_dongfangyancao':
crawler_name = 'crawl_dongfangyancao'
elif website_name == 'crawl_articles':
crawler_name = 'crawl_articles'
else:
# 对于其他网站,使用通用爬虫命令
crawler_name = 'crawl_articles'
# 动态获取网站对象
website = Website.objects.get(name=website_name)
# 根据网站对象确定要执行的爬虫命令
# 移除默认的通用爬虫,每个网站必须配置自己的爬虫命令
crawler_name = getattr(website, 'crawler_command', None)
# 如果网站没有配置爬虫命令,则报错
if not crawler_name:
messages.error(request, f'网站 {website_name} 未配置爬虫命令')
return redirect('admin:core_article_changelist')
# 运行爬虫命令,传递website_name作为参数
call_command(crawler_name)
# 运行爬虫命令,传递网站名称
call_command(crawler_name, website_name)
messages.success(request, f'成功执行爬虫: {crawler_name}')
messages.success(request, f'成功执行爬虫: {website_name}')
except Website.DoesNotExist:
messages.error(request, f'网站不存在: {website_name}')
except Exception as e:
messages.error(request, f'执行爬虫失败: {str(e)}')
@@ -71,6 +61,10 @@ class ArticleAdmin(admin.ModelAdmin):
actions = ['delete_selected_articles', 'export_as_csv', 'export_as_json',
'export_as_word', 'export_with_media']
def get_websites(self):
"""获取所有启用的网站"""
return Website.objects.filter(enabled=True)
# 重写get_urls方法添加自定义URL
def get_urls(self):
urls = super().get_urls()