diff --git a/core/admin.py b/core/admin.py index ccefc8b..bfdd200 100644 --- a/core/admin.py +++ b/core/admin.py @@ -9,33 +9,39 @@ import csv from django.http import HttpResponse import json + # 创建自定义管理站点 class NewsCnAdminSite(AdminSite): site_header = "新华网管理后台" site_title = "新华网管理" index_title = "新华网内容管理" + class DongfangyancaoAdminSite(AdminSite): site_header = "东方烟草报管理后台" site_title = "东方烟草报管理" index_title = "东方烟草报内容管理" + # 实例化管理站点 news_cn_admin = NewsCnAdminSite(name='news_cn_admin') dongfangyancao_admin = DongfangyancaoAdminSite(name='dongfangyancao_admin') + @admin.register(Website) class WebsiteAdmin(admin.ModelAdmin): list_display = ('name', 'base_url', 'enabled') + # 为ArticleAdmin添加自定义动作 @admin.register(Article) class ArticleAdmin(admin.ModelAdmin): list_display = ('title', 'website', 'pub_date') search_fields = ('title', 'content') # 添加动作选项 - actions = ['delete_selected_articles', 'delete_dongfangyancao_articles', 'export_as_csv', 'export_as_json', 'export_as_word'] - + actions = ['delete_selected_articles', 'delete_dongfangyancao_articles', 'export_as_csv', 'export_as_json', + 'export_as_word'] + def delete_dongfangyancao_articles(self, request, queryset): """一键删除东方烟草报的所有文章""" # 获取东方烟草报网站对象 @@ -46,33 +52,34 @@ class ArticleAdmin(admin.ModelAdmin): self.message_user(request, f"成功删除 {deleted_count} 篇东方烟草报文章", messages.SUCCESS) except Website.DoesNotExist: self.message_user(request, "未找到东方烟草报网站配置", messages.ERROR) - + # 设置动作的显示名称 delete_dongfangyancao_articles.short_description = "删除所有东方烟草报文章" - + def export_as_csv(self, request, queryset): """导出选中的文章为CSV格式""" meta = self.model._meta field_names = [field.name for field in meta.fields] - + response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = 'attachment; filename={}.csv'.format(meta) writer = csv.writer(response) - + writer.writerow(field_names) for obj in queryset: - row = [getattr(obj, field)() if callable(getattr(obj, field)) else getattr(obj, field) for field in field_names] + row = [getattr(obj, field)() if callable(getattr(obj, field)) else getattr(obj, field) for field in + field_names] writer.writerow(row) - + return response - + export_as_csv.short_description = "导出选中文章为CSV格式" - + def export_as_json(self, request, queryset): """导出选中的文章为JSON格式""" response = HttpResponse(content_type='application/json') response['Content-Disposition'] = 'attachment; filename=articles.json' - + # 构造要导出的数据 articles_data = [] for article in queryset: @@ -86,11 +93,11 @@ class ArticleAdmin(admin.ModelAdmin): 'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'), 'media_files': article.media_files }) - + # 写入JSON数据 response.write(json.dumps(articles_data, ensure_ascii=False, indent=2)) return response - + export_as_json.short_description = "导出选中文章为JSON格式" def export_as_word(self, request, queryset): @@ -110,19 +117,20 @@ class ArticleAdmin(admin.ModelAdmin): for article in queryset: # 添加文章标题 doc.add_heading(article.title, level=1) - + # 添加文章元数据 doc.add_paragraph(f"网站: {article.website.name}") doc.add_paragraph(f"URL: {article.url}") - doc.add_paragraph(f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}") + doc.add_paragraph( + f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}") doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S')}") - + # 添加文章内容 doc.add_heading('内容', level=2) # 简单处理HTML内容,移除标签并处理图片 from bs4 import BeautifulSoup soup = BeautifulSoup(article.content, 'html.parser') - + # 处理内容中的图片 for img in soup.find_all('img'): src = img.get('src', '') @@ -133,7 +141,7 @@ class ArticleAdmin(admin.ModelAdmin): from django.conf import settings import requests from io import BytesIO - + # 构建完整的图片路径 if src.startswith('http'): # 网络图片 @@ -148,13 +156,13 @@ class ArticleAdmin(admin.ModelAdmin): except Exception as e: # 如果添加图片失败,添加图片URL作为文本 doc.add_paragraph(f"[图片: {src}]") - + # 移除原始img标签 img.decompose() - + content_text = soup.get_text() doc.add_paragraph(content_text) - + # 添加媒体文件信息 if article.media_files: doc.add_heading('媒体文件', level=2) @@ -164,7 +172,7 @@ class ArticleAdmin(admin.ModelAdmin): from django.conf import settings from io import BytesIO import requests - + full_path = os.path.join(settings.MEDIA_ROOT, media_file) if os.path.exists(full_path): # 添加图片到文档 @@ -179,44 +187,46 @@ class ArticleAdmin(admin.ModelAdmin): doc.add_paragraph(media_file) except Exception as e: doc.add_paragraph(media_file) - + # 添加分页符 doc.add_page_break() - + # 保存到内存 buffer = BytesIO() doc.save(buffer) buffer.seek(0) - + # 创建HttpResponse from django.http import HttpResponse - response = HttpResponse(buffer.getvalue(), content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document') + response = HttpResponse(buffer.getvalue(), + content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document') response['Content-Disposition'] = 'attachment; filename=articles.docx' return response - + export_as_word.short_description = "导出选中文章为Word格式" + # 为不同网站创建专门的文章管理类 class NewsCnArticleAdmin(admin.ModelAdmin): list_display = ('title', 'pub_date') search_fields = ('title', 'content') list_filter = ('pub_date',) actions = ['export_as_csv', 'export_as_json'] - + def get_queryset(self, request): qs = super().get_queryset(request) # 只显示新华网的文章 return qs.filter(website__name='www.news.cn') - + def export_as_csv(self, request, queryset): """导出选中的文章为CSV格式""" meta = self.model._meta field_names = [field.name for field in meta.fields if field.name != 'content'] # 排除content字段以减小CSV大小 - + response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = 'attachment; filename=news_cn_articles.csv' writer = csv.writer(response) - + writer.writerow(field_names) for obj in queryset: row = [] @@ -228,16 +238,16 @@ class NewsCnArticleAdmin(admin.ModelAdmin): value = value.name row.append(value) writer.writerow(row) - + return response - + export_as_csv.short_description = "导出选中文章为CSV格式" - + def export_as_json(self, request, queryset): """导出选中的文章为JSON格式""" response = HttpResponse(content_type='application/json') response['Content-Disposition'] = 'attachment; filename=news_cn_articles.json' - + # 构造要导出的数据 articles_data = [] for article in queryset: @@ -251,43 +261,44 @@ class NewsCnArticleAdmin(admin.ModelAdmin): 'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'), 'media_files': article.media_files }) - + # 写入JSON数据 response.write(json.dumps(articles_data, ensure_ascii=False, indent=2)) return response - + export_as_json.short_description = "导出选中文章为JSON格式" + class DongfangyancaoArticleAdmin(admin.ModelAdmin): list_display = ('title', 'pub_date') search_fields = ('title', 'content') list_filter = ('pub_date',) # 添加动作选项 actions = ['delete_selected_articles', 'delete_all_articles', 'export_as_csv', 'export_as_json'] - + def get_queryset(self, request): qs = super().get_queryset(request) # 只显示东方烟草报的文章 return qs.filter(website__name='东方烟草报') - + def delete_all_articles(self, request, queryset): """删除当前筛选的所有文章(东方烟草报的所有文章)""" # 删除所有东方烟草报的文章 deleted_count = self.get_queryset(request).delete()[0] self.message_user(request, f"成功删除 {deleted_count} 篇文章", messages.SUCCESS) - + # 设置动作的显示名称 delete_all_articles.short_description = "删除所有当前筛选的文章" - + def export_as_csv(self, request, queryset): """导出选中的文章为CSV格式""" meta = self.model._meta field_names = [field.name for field in meta.fields if field.name != 'content'] # 排除content字段以减小CSV大小 - + response = HttpResponse(content_type='text/csv') response['Content-Disposition'] = 'attachment; filename=dongfangyancao_articles.csv' writer = csv.writer(response) - + writer.writerow(field_names) for obj in queryset: row = [] @@ -299,16 +310,16 @@ class DongfangyancaoArticleAdmin(admin.ModelAdmin): value = value.name row.append(value) writer.writerow(row) - + return response - + export_as_csv.short_description = "导出选中文章为CSV格式" - + def export_as_json(self, request, queryset): """导出选中的文章为JSON格式""" response = HttpResponse(content_type='application/json') response['Content-Disposition'] = 'attachment; filename=dongfangyancao_articles.json' - + # 构造要导出的数据 articles_data = [] for article in queryset: @@ -322,16 +333,17 @@ class DongfangyancaoArticleAdmin(admin.ModelAdmin): 'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'), 'media_files': article.media_files }) - + # 写入JSON数据 response.write(json.dumps(articles_data, ensure_ascii=False, indent=2)) return response - + export_as_json.short_description = "导出选中文章为JSON格式" + # 在各自的管理站点中注册模型 news_cn_admin.register(Website, WebsiteAdmin) news_cn_admin.register(Article, NewsCnArticleAdmin) dongfangyancao_admin.register(Website, WebsiteAdmin) -dongfangyancao_admin.register(Article, DongfangyancaoArticleAdmin) \ No newline at end of file +dongfangyancao_admin.register(Article, DongfangyancaoArticleAdmin) diff --git a/core/management/commands/crawl_dongfangyancao.py b/core/management/commands/crawl_dongfangyancao.py index 2cb43e9..2f50a7d 100644 --- a/core/management/commands/crawl_dongfangyancao.py +++ b/core/management/commands/crawl_dongfangyancao.py @@ -1,4 +1,3 @@ -# core/management/commands/crawl_dongfangyancao.py from django.core.management.base import BaseCommand from core.models import Website from core.utils import full_site_crawler @@ -18,4 +17,4 @@ class Command(BaseCommand): start_url = "https://www.eastobacco.com/" self.stdout.write(f"开始全站爬取: {start_url}") full_site_crawler(start_url, website, max_pages=500) - self.stdout.write("爬取完成") \ No newline at end of file + self.stdout.write("爬取完成") diff --git a/core/management/commands/crawl_xinhua.py b/core/management/commands/crawl_xinhua.py index 4dc2d9b..ba9d57f 100644 --- a/core/management/commands/crawl_xinhua.py +++ b/core/management/commands/crawl_xinhua.py @@ -1,4 +1,3 @@ -# core/management/commands/crawl_xinhua.py from django.core.management.base import BaseCommand from core.models import Website from core.utils import full_site_crawler diff --git a/core/management/commands/export_articles.py b/core/management/commands/export_articles.py index 16eac9b..7f6912e 100644 --- a/core/management/commands/export_articles.py +++ b/core/management/commands/export_articles.py @@ -23,7 +23,7 @@ class Command(BaseCommand): website_name = options['website'] output_path = options['output'] include_media = options['include_media'] - + # 获取文章查询集 articles = Article.objects.all() if website_name: @@ -33,15 +33,15 @@ class Command(BaseCommand): except Website.DoesNotExist: self.stdout.write(self.style.ERROR(f'网站 "{website_name}" 不存在')) return - + if not articles.exists(): self.stdout.write(self.style.WARNING('没有找到文章')) return - + # 准备导出数据 articles_data = [] media_files = [] - + for article in articles: article_data = { 'id': article.id, @@ -54,14 +54,14 @@ class Command(BaseCommand): 'media_files': article.media_files } articles_data.append(article_data) - + # 收集媒体文件路径 if include_media: for media_path in article.media_files: full_path = os.path.join(settings.MEDIA_ROOT, media_path) if os.path.exists(full_path): media_files.append(full_path) - + # 确定输出路径 if not output_path: timestamp = timezone.now().strftime('%Y%m%d_%H%M%S') @@ -69,7 +69,7 @@ class Command(BaseCommand): output_path = f'articles_export_{timestamp}.zip' else: output_path = f'articles_export_{timestamp}.{format_type}' - + # 执行导出 if include_media: self.export_with_media(articles_data, media_files, output_path, format_type) @@ -84,7 +84,7 @@ class Command(BaseCommand): else: self.stdout.write(self.style.ERROR('不支持的格式,仅支持 json、csv 或 docx')) return - + self.stdout.write(self.style.SUCCESS(f'成功导出 {len(articles_data)} 篇文章到 {output_path}')) def export_as_json(self, articles_data, output_path): @@ -94,16 +94,17 @@ class Command(BaseCommand): def export_as_csv(self, articles_data, output_path): if not articles_data: return - + # 打开CSV文件 with open(output_path, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['id', 'title', 'website', 'url', 'pub_date', 'content', 'created_at', 'media_files'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) - + writer.writeheader() for article_data in articles_data: # 将列表转换为字符串以便在CSV中存储 - article_data['media_files'] = ';'.join(article_data['media_files']) if article_data['media_files'] else '' + article_data['media_files'] = ';'.join(article_data['media_files']) if article_data[ + 'media_files'] else '' writer.writerow(article_data) # 添加Word格式导出方法 @@ -122,19 +123,19 @@ class Command(BaseCommand): for article_data in articles_data: # 添加文章标题 doc.add_heading(article_data['title'], level=1) - + # 添加文章元数据 doc.add_paragraph(f"网站: {article_data['website']}") doc.add_paragraph(f"URL: {article_data['url']}") doc.add_paragraph(f"发布时间: {article_data['pub_date']}") doc.add_paragraph(f"创建时间: {article_data['created_at']}") - + # 添加文章内容 doc.add_heading('内容', level=2) # 简单处理HTML内容,移除标签 from bs4 import BeautifulSoup soup = BeautifulSoup(article_data['content'], 'html.parser') - + # 处理内容中的图片 for img in soup.find_all('img'): src = img.get('src', '') @@ -145,7 +146,7 @@ class Command(BaseCommand): from django.conf import settings import requests from io import BytesIO - + # 构建完整的图片路径 if src.startswith('http'): # 网络图片 @@ -160,13 +161,13 @@ class Command(BaseCommand): except Exception as e: # 如果添加图片失败,添加图片URL作为文本 doc.add_paragraph(f"[图片: {src}]") - + # 移除原始img标签 img.decompose() - + content_text = soup.get_text() doc.add_paragraph(content_text) - + # 添加媒体文件信息 if article_data['media_files']: doc.add_heading('媒体文件', level=2) @@ -176,7 +177,7 @@ class Command(BaseCommand): from django.conf import settings from io import BytesIO import requests - + full_path = os.path.join(settings.MEDIA_ROOT, media_file) if os.path.exists(full_path): # 添加图片到文档 @@ -191,10 +192,10 @@ class Command(BaseCommand): doc.add_paragraph(media_file) except Exception as e: doc.add_paragraph(media_file) - + # 添加分页符 doc.add_page_break() - + # 保存文档 doc.save(output_path) @@ -215,7 +216,8 @@ class Command(BaseCommand): writer = csv.DictWriter(csv_buffer, fieldnames=fieldnames) writer.writeheader() for article_data in articles_data: - article_data['media_files'] = ';'.join(article_data['media_files']) if article_data['media_files'] else '' + article_data['media_files'] = ';'.join(article_data['media_files']) if article_data[ + 'media_files'] else '' writer.writerow(article_data) zipf.writestr(data_filename, csv_buffer.getvalue()) # 添加Word格式支持 @@ -225,7 +227,7 @@ class Command(BaseCommand): from docx import Document from docx.shared import Inches from io import BytesIO - + doc = Document() doc.add_heading('文章导出', 0) @@ -235,11 +237,11 @@ class Command(BaseCommand): doc.add_paragraph(f"URL: {article_data['url']}") doc.add_paragraph(f"发布时间: {article_data['pub_date']}") doc.add_paragraph(f"创建时间: {article_data['created_at']}") - + doc.add_heading('内容', level=2) from bs4 import BeautifulSoup soup = BeautifulSoup(article_data['content'], 'html.parser') - + # 处理内容中的图片 for img in soup.find_all('img'): src = img.get('src', '') @@ -249,7 +251,7 @@ class Command(BaseCommand): import os from django.conf import settings import requests - + # 构建完整的图片路径 if src.startswith('http'): # 网络图片 @@ -264,20 +266,20 @@ class Command(BaseCommand): except Exception as e: # 如果添加图片失败,添加图片URL作为文本 doc.add_paragraph(f"[图片: {src}]") - + # 移除原始img标签 img.decompose() - + content_text = soup.get_text() doc.add_paragraph(content_text) - + if article_data['media_files']: doc.add_heading('媒体文件', level=2) for media_file in article_data['media_files']: try: import os from django.conf import settings - + full_path = os.path.join(settings.MEDIA_ROOT, media_file) if os.path.exists(full_path): # 添加图片到文档 @@ -292,9 +294,9 @@ class Command(BaseCommand): doc.add_paragraph(media_file) except Exception as e: doc.add_paragraph(media_file) - + doc.add_page_break() - + # 将文档保存到内存中再写入ZIP doc_buffer = BytesIO() doc.save(doc_buffer) @@ -302,8 +304,8 @@ class Command(BaseCommand): zipf.writestr(data_filename, doc_buffer.read()) except ImportError: zipf.writestr(data_filename, "错误:缺少python-docx库,无法生成Word文档") - + # 添加媒体文件 for media_path in media_files: arcname = os.path.join('media', os.path.relpath(media_path, settings.MEDIA_ROOT)) - zipf.write(media_path, arcname) \ No newline at end of file + zipf.write(media_path, arcname) diff --git a/core/templates/core/article_detail.html b/core/templates/core/article_detail.html index 155b24b..649e54b 100644 --- a/core/templates/core/article_detail.html +++ b/core/templates/core/article_detail.html @@ -13,39 +13,46 @@ padding: 20px; background-color: #f8f9fa; } + .article-container { background: white; border-radius: 8px; - box-shadow: 0 2px 10px rgba(0,0,0,0.1); + box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1); padding: 30px; margin-bottom: 20px; } + h1 { color: #2c3e50; border-bottom: 2px solid #3498db; padding-bottom: 10px; margin-top: 0; } + .meta { color: #7f8c8d; font-size: 0.9em; margin-bottom: 20px; } + hr { border: 0; height: 1px; background: #ecf0f1; margin: 20px 0; } + .content { font-size: 16px; } + .content img { max-width: 100%; height: auto; border-radius: 4px; margin: 10px 0; } + .back-link { display: inline-block; padding: 10px 20px; @@ -55,23 +62,24 @@ border-radius: 4px; transition: background-color 0.3s; } + .back-link:hover { background-color: #2980b9; }
-