Add Search button

Support export for Word
Add packages
2025-08-11 23:42:14 +08:00 · 2025-08-11 23:14:56 +08:00 · 2025-08-11 22:55:57 +08:00 · 2025-08-11 22:20:19 +08:00 · 2025-08-11 14:33:32 +08:00
12 changed files with 1220 additions and 83 deletions
--- a/core/admin.py
+++ b/core/admin.py
@@ -1,11 +1,349 @@
 from django.contrib import admin
+from django.contrib.admin import AdminSite
 from .models import Website, Article
+# 添加actions相关的导入
+from django.contrib import messages
+from django.http import HttpResponseRedirect
+# 添加导出功能所需导入
+import csv
+from django.http import HttpResponse
+import json
+
+
+# 创建自定义管理站点
+class NewsCnAdminSite(AdminSite):
+    site_header = "新华网管理后台"
+    site_title = "新华网管理"
+    index_title = "新华网内容管理"
+
+
+class DongfangyancaoAdminSite(AdminSite):
+    site_header = "东方烟草报管理后台"
+    site_title = "东方烟草报管理"
+    index_title = "东方烟草报内容管理"
+
+
+# 实例化管理站点
+news_cn_admin = NewsCnAdminSite(name='news_cn_admin')
+dongfangyancao_admin = DongfangyancaoAdminSite(name='dongfangyancao_admin')
+

@admin.register(Website)
 class WebsiteAdmin(admin.ModelAdmin):
    list_display = ('name', 'base_url', 'enabled')

+
+# 为ArticleAdmin添加自定义动作
@admin.register(Article)
 class ArticleAdmin(admin.ModelAdmin):
    list_display = ('title', 'website', 'pub_date')
    search_fields = ('title', 'content')
+    # 添加动作选项
+    actions = ['delete_selected_articles', 'delete_dongfangyancao_articles', 'export_as_csv', 'export_as_json',
+               'export_as_word']
+
+    def delete_dongfangyancao_articles(self, request, queryset):
+        """一键删除东方烟草报的所有文章"""
+        # 获取东方烟草报网站对象
+        try:
+            dongfangyancao_website = Website.objects.get(name='东方烟草报')
+            # 删除所有东方烟草报的文章
+            deleted_count = Article.objects.filter(website=dongfangyancao_website).delete()[0]
+            self.message_user(request, f"成功删除 {deleted_count} 篇东方烟草报文章", messages.SUCCESS)
+        except Website.DoesNotExist:
+            self.message_user(request, "未找到东方烟草报网站配置", messages.ERROR)
+
+    # 设置动作的显示名称
+    delete_dongfangyancao_articles.short_description = "删除所有东方烟草报文章"
+
+    def export_as_csv(self, request, queryset):
+        """导出选中的文章为CSV格式"""
+        meta = self.model._meta
+        field_names = [field.name for field in meta.fields]
+
+        response = HttpResponse(content_type='text/csv')
+        response['Content-Disposition'] = 'attachment; filename={}.csv'.format(meta)
+        writer = csv.writer(response)
+
+        writer.writerow(field_names)
+        for obj in queryset:
+            row = [getattr(obj, field)() if callable(getattr(obj, field)) else getattr(obj, field) for field in
+                   field_names]
+            writer.writerow(row)
+
+        return response
+
+    export_as_csv.short_description = "导出选中文章为CSV格式"
+
+    def export_as_json(self, request, queryset):
+        """导出选中的文章为JSON格式"""
+        response = HttpResponse(content_type='application/json')
+        response['Content-Disposition'] = 'attachment; filename=articles.json'
+
+        # 构造要导出的数据
+        articles_data = []
+        for article in queryset:
+            articles_data.append({
+                'id': article.id,
+                'title': article.title,
+                'website': article.website.name,
+                'url': article.url,
+                'pub_date': article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else None,
+                'content': article.content,
+                'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
+                'media_files': article.media_files
+            })
+
+        # 写入JSON数据
+        response.write(json.dumps(articles_data, ensure_ascii=False, indent=2))
+        return response
+
+    export_as_json.short_description = "导出选中文章为JSON格式"
+
+    def export_as_word(self, request, queryset):
+        """导出选中的文章为Word格式"""
+        try:
+            from docx import Document
+            from io import BytesIO
+            from docx.shared import Inches
+        except ImportError:
+            self.message_user(request, "缺少python-docx库，请安装: pip install python-docx", messages.ERROR)
+            return
+
+        # 创建Word文档
+        doc = Document()
+        doc.add_heading('文章导出', 0)
+
+        for article in queryset:
+            # 添加文章标题
+            doc.add_heading(article.title, level=1)
+
+            # 添加文章元数据
+            doc.add_paragraph(f"网站: {article.website.name}")
+            doc.add_paragraph(f"URL: {article.url}")
+            doc.add_paragraph(
+                f"发布时间: {article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else 'N/A'}")
+            doc.add_paragraph(f"创建时间: {article.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
+
+            # 添加文章内容
+            doc.add_heading('内容', level=2)
+            # 简单处理HTML内容，移除标签并处理图片
+            from bs4 import BeautifulSoup
+            soup = BeautifulSoup(article.content, 'html.parser')
+
+            # 处理内容中的图片
+            for img in soup.find_all('img'):
+                src = img.get('src', '')
+                if src:
+                    # 尝试添加图片到文档
+                    try:
+                        import os
+                        from django.conf import settings
+                        import requests
+                        from io import BytesIO
+
+                        # 构建完整的图片路径
+                        if src.startswith('http'):
+                            # 网络图片
+                            response = requests.get(src, timeout=10)
+                            image_stream = BytesIO(response.content)
+                            doc.add_picture(image_stream, width=Inches(4.0))
+                        else:
+                            # 本地图片
+                            full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
+                            if os.path.exists(full_path):
+                                doc.add_picture(full_path, width=Inches(4.0))
+                    except Exception as e:
+                        # 如果添加图片失败，添加图片URL作为文本
+                        doc.add_paragraph(f"[图片: {src}]")
+
+                # 移除原始img标签
+                img.decompose()
+
+            content_text = soup.get_text()
+            doc.add_paragraph(content_text)
+
+            # 添加媒体文件信息
+            if article.media_files:
+                doc.add_heading('媒体文件', level=2)
+                for media_file in article.media_files:
+                    try:
+                        import os
+                        from django.conf import settings
+                        from io import BytesIO
+                        import requests
+
+                        full_path = os.path.join(settings.MEDIA_ROOT, media_file)
+                        if os.path.exists(full_path):
+                            # 添加图片到文档
+                            doc.add_picture(full_path, width=Inches(4.0))
+                        else:
+                            # 如果是URL格式的媒体文件
+                            if media_file.startswith('http'):
+                                response = requests.get(media_file, timeout=10)
+                                image_stream = BytesIO(response.content)
+                                doc.add_picture(image_stream, width=Inches(4.0))
+                            else:
+                                doc.add_paragraph(media_file)
+                    except Exception as e:
+                        doc.add_paragraph(media_file)
+
+            # 添加分页符
+            doc.add_page_break()
+
+        # 保存到内存
+        buffer = BytesIO()
+        doc.save(buffer)
+        buffer.seek(0)
+
+        # 创建HttpResponse
+        from django.http import HttpResponse
+        response = HttpResponse(buffer.getvalue(),
+                                content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
+        response['Content-Disposition'] = 'attachment; filename=articles.docx'
+        return response
+
+    export_as_word.short_description = "导出选中文章为Word格式"
+
+
+# 为不同网站创建专门的文章管理类
+class NewsCnArticleAdmin(admin.ModelAdmin):
+    list_display = ('title', 'pub_date')
+    search_fields = ('title', 'content')
+    list_filter = ('pub_date',)
+    actions = ['export_as_csv', 'export_as_json']
+
+    def get_queryset(self, request):
+        qs = super().get_queryset(request)
+        # 只显示新华网的文章
+        return qs.filter(website__name='www.news.cn')
+
+    def export_as_csv(self, request, queryset):
+        """导出选中的文章为CSV格式"""
+        meta = self.model._meta
+        field_names = [field.name for field in meta.fields if field.name != 'content']  # 排除content字段以减小CSV大小
+
+        response = HttpResponse(content_type='text/csv')
+        response['Content-Disposition'] = 'attachment; filename=news_cn_articles.csv'
+        writer = csv.writer(response)
+
+        writer.writerow(field_names)
+        for obj in queryset:
+            row = []
+            for field in field_names:
+                value = getattr(obj, field)
+                if callable(value):
+                    value = value()
+                if field == 'website':
+                    value = value.name
+                row.append(value)
+            writer.writerow(row)
+
+        return response
+
+    export_as_csv.short_description = "导出选中文章为CSV格式"
+
+    def export_as_json(self, request, queryset):
+        """导出选中的文章为JSON格式"""
+        response = HttpResponse(content_type='application/json')
+        response['Content-Disposition'] = 'attachment; filename=news_cn_articles.json'
+
+        # 构造要导出的数据
+        articles_data = []
+        for article in queryset:
+            articles_data.append({
+                'id': article.id,
+                'title': article.title,
+                'website': article.website.name,
+                'url': article.url,
+                'pub_date': article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else None,
+                'content': article.content,
+                'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
+                'media_files': article.media_files
+            })
+
+        # 写入JSON数据
+        response.write(json.dumps(articles_data, ensure_ascii=False, indent=2))
+        return response
+
+    export_as_json.short_description = "导出选中文章为JSON格式"
+
+
+class DongfangyancaoArticleAdmin(admin.ModelAdmin):
+    list_display = ('title', 'pub_date')
+    search_fields = ('title', 'content')
+    list_filter = ('pub_date',)
+    # 添加动作选项
+    actions = ['delete_selected_articles', 'delete_all_articles', 'export_as_csv', 'export_as_json']
+
+    def get_queryset(self, request):
+        qs = super().get_queryset(request)
+        # 只显示东方烟草报的文章
+        return qs.filter(website__name='东方烟草报')
+
+    def delete_all_articles(self, request, queryset):
+        """删除当前筛选的所有文章（东方烟草报的所有文章）"""
+        # 删除所有东方烟草报的文章
+        deleted_count = self.get_queryset(request).delete()[0]
+        self.message_user(request, f"成功删除 {deleted_count} 篇文章", messages.SUCCESS)
+
+    # 设置动作的显示名称
+    delete_all_articles.short_description = "删除所有当前筛选的文章"
+
+    def export_as_csv(self, request, queryset):
+        """导出选中的文章为CSV格式"""
+        meta = self.model._meta
+        field_names = [field.name for field in meta.fields if field.name != 'content']  # 排除content字段以减小CSV大小
+
+        response = HttpResponse(content_type='text/csv')
+        response['Content-Disposition'] = 'attachment; filename=dongfangyancao_articles.csv'
+        writer = csv.writer(response)
+
+        writer.writerow(field_names)
+        for obj in queryset:
+            row = []
+            for field in field_names:
+                value = getattr(obj, field)
+                if callable(value):
+                    value = value()
+                if field == 'website':
+                    value = value.name
+                row.append(value)
+            writer.writerow(row)
+
+        return response
+
+    export_as_csv.short_description = "导出选中文章为CSV格式"
+
+    def export_as_json(self, request, queryset):
+        """导出选中的文章为JSON格式"""
+        response = HttpResponse(content_type='application/json')
+        response['Content-Disposition'] = 'attachment; filename=dongfangyancao_articles.json'
+
+        # 构造要导出的数据
+        articles_data = []
+        for article in queryset:
+            articles_data.append({
+                'id': article.id,
+                'title': article.title,
+                'website': article.website.name,
+                'url': article.url,
+                'pub_date': article.pub_date.strftime('%Y-%m-%d %H:%M:%S') if article.pub_date else None,
+                'content': article.content,
+                'created_at': article.created_at.strftime('%Y-%m-%d %H:%M:%S'),
+                'media_files': article.media_files
+            })
+
+        # 写入JSON数据
+        response.write(json.dumps(articles_data, ensure_ascii=False, indent=2))
+        return response
+
+    export_as_json.short_description = "导出选中文章为JSON格式"
+
+
+# 在各自的管理站点中注册模型
+news_cn_admin.register(Website, WebsiteAdmin)
+news_cn_admin.register(Article, NewsCnArticleAdmin)
+
+dongfangyancao_admin.register(Website, WebsiteAdmin)
+dongfangyancao_admin.register(Article, DongfangyancaoArticleAdmin)
--- a/core/management/commands/crawl_dongfangyancao.py
+++ b/core/management/commands/crawl_dongfangyancao.py
@@ -0,0 +1,20 @@
+from django.core.management.base import BaseCommand
+from core.models import Website
+from core.utils import full_site_crawler
+
+
+class Command(BaseCommand):
+    help = "全站递归爬取 东方烟草报"
+
+    def handle(self, *args, **kwargs):
+        website, created = Website.objects.get_or_create(
+            name="东方烟草报",
+            defaults={
+                'article_list_url': 'https://www.eastobacco.com/',
+                'article_selector': 'a'
+            }
+        )
+        start_url = "https://www.eastobacco.com/"
+        self.stdout.write(f"开始全站爬取: {start_url}")
+        full_site_crawler(start_url, website, max_pages=500)
+        self.stdout.write("爬取完成")
--- a/core/management/commands/crawl_xinhua.py
+++ b/core/management/commands/crawl_xinhua.py
@@ -1,18 +1,20 @@
 from django.core.management.base import BaseCommand
 from core.models import Website
-from core.utils import crawl_xinhua_list
+from core.utils import full_site_crawler
+

 class Command(BaseCommand):
-    help = '批量爬取新华网文章'
+    help = "全站递归爬取 www.news.cn"

-    def handle(self, *args, **options):
-        list_url = "https://www.news.cn/legal/index.html"
-        try:
-            website = Website.objects.get(base_url="https://www.news.cn/")
-        except Website.DoesNotExist:
-            self.stdout.write(self.style.ERROR("网站 https://www.news.cn/ 不存在，请先后台添加"))
-            return
-
-        self.stdout.write(f"开始爬取文章列表页: {list_url}")
-        crawl_xinhua_list(list_url, website)
-        self.stdout.write(self.style.SUCCESS("批量爬取完成"))
+    def handle(self, *args, **kwargs):
+        website, created = Website.objects.get_or_create(
+            name="www.news.cn",
+            defaults={
+                'article_list_url': 'https://www.news.cn/',
+                'article_selector': 'a'
+            }
+        )
+        start_url = "https://www.news.cn/"
+        self.stdout.write(f"开始全站爬取: {start_url}")
+        full_site_crawler(start_url, website, max_pages=500)
+        self.stdout.write("爬取完成")
--- a/core/management/commands/crawl_xinhua_bak.py
+++ b/core/management/commands/crawl_xinhua_bak.py
@@ -0,0 +1,21 @@
+from django.core.management.base import BaseCommand
+from core.models import Website
+from core.utils import crawl_xinhua_list
+
+class Command(BaseCommand):
+    help = '批量爬取新华网文章'
+
+    def handle(self, *args, **options):
+        # 添加使用标记，确认该命令是否被调用
+        self.stdout.write(self.style.WARNING("crawl_xinhua command is being used"))
+        
+        list_url = "https://www.news.cn/legal/index.html"
+        try:
+            website = Website.objects.get(base_url="https://www.news.cn/")
+        except Website.DoesNotExist:
+            self.stdout.write(self.style.ERROR("网站 https://www.news.cn/ 不存在，请先后台添加"))
+            return
+
+        self.stdout.write(f"开始爬取文章列表页: {list_url}")
+        crawl_xinhua_list(list_url, website)
+        self.stdout.write(self.style.SUCCESS("批量爬取完成"))
--- a/core/management/commands/export_articles.py
+++ b/core/management/commands/export_articles.py
@@ -0,0 +1,311 @@
+from django.core.management.base import BaseCommand
+from core.models import Article, Website
+import json
+import csv
+import os
+from django.conf import settings
+from django.core.files.storage import default_storage
+import zipfile
+from django.utils import timezone
+
+
+class Command(BaseCommand):
+    help = '导出文章及相关的媒体文件（图片、视频等）'
+
+    def add_arguments(self, parser):
+        parser.add_argument('--format', type=str, default='json', help='导出格式: json 或 csv')
+        parser.add_argument('--website', type=str, help='指定网站名称导出特定网站的文章')
+        parser.add_argument('--output', type=str, default='', help='输出文件路径')
+        parser.add_argument('--include-media', action='store_true', help='包含媒体文件')
+
+    def handle(self, *args, **options):
+        format_type = options['format'].lower()
+        website_name = options['website']
+        output_path = options['output']
+        include_media = options['include_media']
+
+        # 获取文章查询集
+        articles = Article.objects.all()
+        if website_name:
+            try:
+                website = Website.objects.get(name=website_name)
+                articles = articles.filter(website=website)
+            except Website.DoesNotExist:
+                self.stdout.write(self.style.ERROR(f'网站 "{website_name}" 不存在'))
+                return
+
+        if not articles.exists():
+            self.stdout.write(self.style.WARNING('没有找到文章'))
+            return
+
+        # 准备导出数据
+        articles_data = []
+        media_files = []
+
+        for article in articles:
+            article_data = {
+                'id': article.id,
+                'title': article.title,
+                'website': article.website.name,
+                'url': article.url,
+                'pub_date': article.pub_date.isoformat() if article.pub_date else None,
+                'content': article.content,
+                'created_at': article.created_at.isoformat(),
+                'media_files': article.media_files
+            }
+            articles_data.append(article_data)
+
+            # 收集媒体文件路径
+            if include_media:
+                for media_path in article.media_files:
+                    full_path = os.path.join(settings.MEDIA_ROOT, media_path)
+                    if os.path.exists(full_path):
+                        media_files.append(full_path)
+
+        # 确定输出路径
+        if not output_path:
+            timestamp = timezone.now().strftime('%Y%m%d_%H%M%S')
+            if include_media:
+                output_path = f'articles_export_{timestamp}.zip'
+            else:
+                output_path = f'articles_export_{timestamp}.{format_type}'
+
+        # 执行导出
+        if include_media:
+            self.export_with_media(articles_data, media_files, output_path, format_type)
+        else:
+            if format_type == 'json':
+                self.export_as_json(articles_data, output_path)
+            elif format_type == 'csv':
+                self.export_as_csv(articles_data, output_path)
+            # 添加Word格式导出支持
+            elif format_type == 'docx':
+                self.export_as_word(articles_data, output_path)
+            else:
+                self.stdout.write(self.style.ERROR('不支持的格式，仅支持 json、csv 或 docx'))
+                return
+
+        self.stdout.write(self.style.SUCCESS(f'成功导出 {len(articles_data)} 篇文章到 {output_path}'))
+
+    def export_as_json(self, articles_data, output_path):
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(articles_data, f, ensure_ascii=False, indent=2)
+
+    def export_as_csv(self, articles_data, output_path):
+        if not articles_data:
+            return
+
+        # 打开CSV文件
+        with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
+            fieldnames = ['id', 'title', 'website', 'url', 'pub_date', 'content', 'created_at', 'media_files']
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+
+            writer.writeheader()
+            for article_data in articles_data:
+                # 将列表转换为字符串以便在CSV中存储
+                article_data['media_files'] = ';'.join(article_data['media_files']) if article_data[
+                    'media_files'] else ''
+                writer.writerow(article_data)
+
+    # 添加Word格式导出方法
+    def export_as_word(self, articles_data, output_path):
+        try:
+            from docx import Document
+            from docx.shared import Inches
+        except ImportError:
+            self.stdout.write(self.style.ERROR('缺少python-docx库，请安装: pip install python-docx'))
+            return
+
+        # 创建Word文档
+        doc = Document()
+        doc.add_heading('文章导出', 0)
+
+        for article_data in articles_data:
+            # 添加文章标题
+            doc.add_heading(article_data['title'], level=1)
+
+            # 添加文章元数据
+            doc.add_paragraph(f"网站: {article_data['website']}")
+            doc.add_paragraph(f"URL: {article_data['url']}")
+            doc.add_paragraph(f"发布时间: {article_data['pub_date']}")
+            doc.add_paragraph(f"创建时间: {article_data['created_at']}")
+
+            # 添加文章内容
+            doc.add_heading('内容', level=2)
+            # 简单处理HTML内容，移除标签
+            from bs4 import BeautifulSoup
+            soup = BeautifulSoup(article_data['content'], 'html.parser')
+
+            # 处理内容中的图片
+            for img in soup.find_all('img'):
+                src = img.get('src', '')
+                if src:
+                    # 尝试添加图片到文档
+                    try:
+                        import os
+                        from django.conf import settings
+                        import requests
+                        from io import BytesIO
+
+                        # 构建完整的图片路径
+                        if src.startswith('http'):
+                            # 网络图片
+                            response = requests.get(src, timeout=10)
+                            image_stream = BytesIO(response.content)
+                            doc.add_picture(image_stream, width=Inches(4.0))
+                        else:
+                            # 本地图片
+                            full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
+                            if os.path.exists(full_path):
+                                doc.add_picture(full_path, width=Inches(4.0))
+                    except Exception as e:
+                        # 如果添加图片失败，添加图片URL作为文本
+                        doc.add_paragraph(f"[图片: {src}]")
+
+                # 移除原始img标签
+                img.decompose()
+
+            content_text = soup.get_text()
+            doc.add_paragraph(content_text)
+
+            # 添加媒体文件信息
+            if article_data['media_files']:
+                doc.add_heading('媒体文件', level=2)
+                for media_file in article_data['media_files']:
+                    try:
+                        import os
+                        from django.conf import settings
+                        from io import BytesIO
+                        import requests
+
+                        full_path = os.path.join(settings.MEDIA_ROOT, media_file)
+                        if os.path.exists(full_path):
+                            # 添加图片到文档
+                            doc.add_picture(full_path, width=Inches(4.0))
+                        else:
+                            # 如果是URL格式的媒体文件
+                            if media_file.startswith('http'):
+                                response = requests.get(media_file, timeout=10)
+                                image_stream = BytesIO(response.content)
+                                doc.add_picture(image_stream, width=Inches(4.0))
+                            else:
+                                doc.add_paragraph(media_file)
+                    except Exception as e:
+                        doc.add_paragraph(media_file)
+
+            # 添加分页符
+            doc.add_page_break()
+
+        # 保存文档
+        doc.save(output_path)
+
+    def export_with_media(self, articles_data, media_files, output_path, format_type):
+        # 创建ZIP文件
+        with zipfile.ZipFile(output_path, 'w') as zipf:
+            # 添加文章数据文件
+            data_filename = f'articles.{format_type}'
+            if format_type == 'json':
+                json_data = json.dumps(articles_data, ensure_ascii=False, indent=2)
+                zipf.writestr(data_filename, json_data)
+            elif format_type == 'csv':
+                # 创建CSV内容
+                if articles_data:
+                    import io
+                    csv_buffer = io.StringIO()
+                    fieldnames = ['id', 'title', 'website', 'url', 'pub_date', 'content', 'created_at', 'media_files']
+                    writer = csv.DictWriter(csv_buffer, fieldnames=fieldnames)
+                    writer.writeheader()
+                    for article_data in articles_data:
+                        article_data['media_files'] = ';'.join(article_data['media_files']) if article_data[
+                            'media_files'] else ''
+                        writer.writerow(article_data)
+                    zipf.writestr(data_filename, csv_buffer.getvalue())
+            # 添加Word格式支持
+            elif format_type == 'docx':
+                # 创建Word文档并保存到ZIP
+                try:
+                    from docx import Document
+                    from docx.shared import Inches
+                    from io import BytesIO
+
+                    doc = Document()
+                    doc.add_heading('文章导出', 0)
+
+                    for article_data in articles_data:
+                        doc.add_heading(article_data['title'], level=1)
+                        doc.add_paragraph(f"网站: {article_data['website']}")
+                        doc.add_paragraph(f"URL: {article_data['url']}")
+                        doc.add_paragraph(f"发布时间: {article_data['pub_date']}")
+                        doc.add_paragraph(f"创建时间: {article_data['created_at']}")
+
+                        doc.add_heading('内容', level=2)
+                        from bs4 import BeautifulSoup
+                        soup = BeautifulSoup(article_data['content'], 'html.parser')
+
+                        # 处理内容中的图片
+                        for img in soup.find_all('img'):
+                            src = img.get('src', '')
+                            if src:
+                                # 尝试添加图片到文档
+                                try:
+                                    import os
+                                    from django.conf import settings
+                                    import requests
+
+                                    # 构建完整的图片路径
+                                    if src.startswith('http'):
+                                        # 网络图片
+                                        response = requests.get(src, timeout=10)
+                                        image_stream = BytesIO(response.content)
+                                        doc.add_picture(image_stream, width=Inches(4.0))
+                                    else:
+                                        # 本地图片
+                                        full_path = os.path.join(settings.MEDIA_ROOT, src.lstrip('/'))
+                                        if os.path.exists(full_path):
+                                            doc.add_picture(full_path, width=Inches(4.0))
+                                except Exception as e:
+                                    # 如果添加图片失败，添加图片URL作为文本
+                                    doc.add_paragraph(f"[图片: {src}]")
+
+                            # 移除原始img标签
+                            img.decompose()
+
+                        content_text = soup.get_text()
+                        doc.add_paragraph(content_text)
+
+                        if article_data['media_files']:
+                            doc.add_heading('媒体文件', level=2)
+                            for media_file in article_data['media_files']:
+                                try:
+                                    import os
+                                    from django.conf import settings
+
+                                    full_path = os.path.join(settings.MEDIA_ROOT, media_file)
+                                    if os.path.exists(full_path):
+                                        # 添加图片到文档
+                                        doc.add_picture(full_path, width=Inches(4.0))
+                                    else:
+                                        # 如果是URL格式的媒体文件
+                                        if media_file.startswith('http'):
+                                            response = requests.get(media_file, timeout=10)
+                                            image_stream = BytesIO(response.content)
+                                            doc.add_picture(image_stream, width=Inches(4.0))
+                                        else:
+                                            doc.add_paragraph(media_file)
+                                except Exception as e:
+                                    doc.add_paragraph(media_file)
+
+                        doc.add_page_break()
+
+                    # 将文档保存到内存中再写入ZIP
+                    doc_buffer = BytesIO()
+                    doc.save(doc_buffer)
+                    doc_buffer.seek(0)
+                    zipf.writestr(data_filename, doc_buffer.read())
+                except ImportError:
+                    zipf.writestr(data_filename, "错误：缺少python-docx库，无法生成Word文档")
+
+            # 添加媒体文件
+            for media_path in media_files:
+                arcname = os.path.join('media', os.path.relpath(media_path, settings.MEDIA_ROOT))
+                zipf.write(media_path, arcname)
--- a/core/models.py
+++ b/core/models.py
@@ -1,5 +1,6 @@
 from django.db import models

+
 class Website(models.Model):
    name = models.CharField(max_length=100, unique=True)
    base_url = models.URLField()
--- a/core/templates/core/article_detail.html
+++ b/core/templates/core/article_detail.html
@@ -1,17 +1,85 @@
 <!DOCTYPE html>
 <html lang="zh">
 <head>
-    <meta charset="UTF-8" />
+    <meta charset="UTF-8"/>
    <title>{{ article.title }}</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
+            line-height: 1.6;
+            color: #333;
+            max-width: 1200px; /* 修改:同步调整页面最大宽度与列表页一致 */
+            margin: 0 auto;
+            padding: 20px;
+            background-color: #f8f9fa;
+        }
+
+        .article-container {
+            background: white;
+            border-radius: 8px;
+            box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+            padding: 30px;
+            margin-bottom: 20px;
+        }
+
+        h1 {
+            color: #2c3e50;
+            border-bottom: 2px solid #3498db;
+            padding-bottom: 10px;
+            margin-top: 0;
+        }
+
+        .meta {
+            color: #7f8c8d;
+            font-size: 0.9em;
+            margin-bottom: 20px;
+        }
+
+        hr {
+            border: 0;
+            height: 1px;
+            background: #ecf0f1;
+            margin: 20px 0;
+        }
+
+        .content {
+            font-size: 16px;
+        }
+
+        .content img {
+            max-width: 100%;
+            height: auto;
+            border-radius: 4px;
+            margin: 10px 0;
+        }
+
+        .back-link {
+            display: inline-block;
+            padding: 10px 20px;
+            background-color: #3498db;
+            color: white;
+            text-decoration: none;
+            border-radius: 4px;
+            transition: background-color 0.3s;
+        }
+
+        .back-link:hover {
+            background-color: #2980b9;
+        }
+    </style>
 </head>
 <body>
+<div class="article-container">
    <h1>{{ article.title }}</h1>
+    <div class="meta">
        <p>发布时间: {{ article.pub_date|date:"Y-m-d H:i" }}</p>
-    <hr />
-    <div>
+    </div>
+    <hr/>
+    <div class="content">
        {{ article.content|safe }}
    </div>
-    <hr />
-    <p><a href="{% url 'article_list' %}">返回列表</a></p>
+    <hr/>
+    <p><a href="{% url 'article_list' %}" class="back-link">← 返回列表</a></p>
+</div>
 </body>
 </html>
--- a/core/templates/core/article_list.html
+++ b/core/templates/core/article_list.html
@@ -1,17 +1,200 @@
 <!DOCTYPE html>
 <html lang="zh">
 <head>
-    <meta charset="UTF-8" />
+    <meta charset="UTF-8"/>
    <title>绿色课堂文章列表</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
+            line-height: 1.6;
+            color: #333;
+            max-width: 1200px; /* 修改:增加页面最大宽度 */
+            margin: 0 auto;
+            padding: 20px;
+            background-color: #f8f9fa;
+        }
+
+        .container {
+            background: white;
+            border-radius: 8px;
+            box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+            padding: 30px;
+            margin-bottom: 20px;
+        }
+
+        h1 {
+            color: #2c3e50;
+            border-bottom: 2px solid #3498db;
+            padding-bottom: 10px;
+            margin-top: 0;
+        }
+
+        .filters {
+            margin-bottom: 20px;
+            padding: 15px;
+            background-color: #f1f8ff;
+            border-radius: 5px;
+        }
+
+        .filters a {
+            display: inline-block;
+            padding: 5px 10px;
+            margin: 0 5px 5px 0;
+            background-color: #e1e8ed;
+            color: #333;
+            text-decoration: none;
+            border-radius: 3px;
+        }
+
+        .filters a.active {
+            background-color: #3498db;
+            color: white;
+        }
+
+        ul {
+            list-style: none;
+            padding: 0;
+        }
+
+        li {
+            padding: 10px 0;
+            border-bottom: 1px solid #ecf0f1;
+        }
+
+        li:last-child {
+            border-bottom: none;
+        }
+
+        a {
+            color: #3498db;
+            text-decoration: none;
+        }
+
+        a:hover {
+            color: #2980b9;
+            text-decoration: underline;
+        }
+
+        .meta {
+            color: #7f8c8d;
+            font-size: 0.9em;
+        }
+
+        .pagination {
+            margin-top: 30px;
+            text-align: center;
+            padding: 20px 0;
+        }
+
+        .pagination a {
+            display: inline-block;
+            padding: 8px 16px;
+            background-color: #3498db;
+            color: white;
+            text-decoration: none;
+            border-radius: 4px;
+            margin: 0 2px; /* 修改:调整页码间距 */
+        }
+
+        .pagination a:hover {
+            background-color: #2980b9;
+        }
+
+        .pagination span {
+            margin: 0 10px;
+            color: #7f8c8d;
+        }
+
+        /* 新增:当前页码样式 */
+        .pagination .current {
+            background-color: #2980b9;
+            cursor: default;
+        }
+
+        /* 新增:省略号样式 */
+        .pagination .ellipsis {
+            display: inline-block;
+            padding: 8px 4px;
+            color: #7f8c8d;
+        }
+        
+        /* 新增:搜索框样式 */
+        .search-form {
+            margin-bottom: 20px;
+            padding: 15px;
+            background-color: #f1f8ff;
+            border-radius: 5px;
+        }
+        
+        .search-form input[type="text"] {
+            padding: 8px 12px;
+            border: 1px solid #ddd;
+            border-radius: 4px;
+            width: 300px;
+            margin-right: 10px;
+        }
+        
+        .search-form input[type="submit"] {
+            padding: 8px 16px;
+            background-color: #3498db;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        
+        .search-form input[type="submit"]:hover {
+            background-color: #2980b9;
+        }
+        
+        .search-info {
+            color: #7f8c8d;
+            font-size: 0.9em;
+            margin-bottom: 10px;
+        }
+    </style>
 </head>
 <body>
+<div class="container">
    <h1>绿色课堂文章列表</h1>
    
+    <!-- 新增:返回首页链接 -->
+    <div style="margin-bottom: 20px;">
+        <a href="{% url 'article_list' %}" style="color: #3498db; text-decoration: none;">&larr; 返回首页</a>
+    </div>
+
+    <!-- 新增:搜索表单 -->
+    <div class="search-form">
+        <form method="get">
+            <input type="text" name="q" placeholder="输入关键词搜索文章..." value="{{ search_query }}">
+            {% if selected_website %}
+            <input type="hidden" name="website" value="{{ selected_website.id }}">
+            {% endif %}
+            <input type="submit" value="搜索">
+        </form>
+    </div>
+
+    <div class="filters">
+        <strong>按网站筛选：</strong>
+        <a href="{% url 'article_list' %}{% if search_query %}?q={{ search_query }}{% endif %}" {% if not selected_website %}class="active" {% endif %}>全部</a>
+        {% for website in websites %}
+        <a href="?website={{ website.id }}{% if search_query %}&q={{ search_query }}{% endif %}" {% if selected_website and selected_website.id == website.id %}class="active" {% endif %}>{{ website.name }}</a>
+        {% endfor %}
+    </div>
+
+    <!-- 新增:搜索结果信息 -->
+    {% if search_query %}
+    <div class="search-info">
+        搜索 "{{ search_query }}" 找到 {{ page_obj.paginator.count }} 篇文章
+        <a href="{% if selected_website %}?website={{ selected_website.id }}{% else %}{% url 'article_list' %}{% endif %}">清除搜索</a>
+    </div>
+    {% endif %}
+
    <ul>
        {% for article in page_obj %}
        <li>
            <a href="{% url 'article_detail' article.id %}">{{ article.title }}</a>
-                ({{ article.created_at|date:"Y-m-d" }})
+            <div class="meta">({{ article.website.name }} - {{ article.created_at|date:"Y-m-d" }})</div>
        </li>
        {% empty %}
        <li>暂无文章</li>
@@ -20,14 +203,50 @@

    <div class="pagination">
        {% if page_obj.has_previous %}
-            <a href="?page={{ page_obj.previous_page_number }}">上一页</a>
+        {% if selected_website %}
+        <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page=1">&laquo; 首页</a>
+        <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.previous_page_number }}">上一页</a>
+        {% else %}
+        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page=1">&laquo; 首页</a>
+        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.previous_page_number }}">上一页</a>
+        {% endif %}
        {% endif %}

        <span>第 {{ page_obj.number }} 页，共 {{ page_obj.paginator.num_pages }} 页</span>

+        <!-- 修改:优化页码显示逻辑 -->
+        {% with page_obj.paginator as paginator %}
+        {% for num in paginator.page_range %}
+        {% if page_obj.number == num %}
+        <a href="#" class="current">{{ num }}</a>
+        {% elif num > page_obj.number|add:'-3' and num < page_obj.number|add:'3' %}
+        {% if selected_website %}
+        <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
+        {% else %}
+        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
+        {% endif %}
+        {% elif num == 1 or num == paginator.num_pages %}
+        {% if selected_website %}
+        <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ num }}">{{ num }}</a>
+        {% else %}
+        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ num }}">{{ num }}</a>
+        {% endif %}
+        {% elif num == page_obj.number|add:'-3' or num == page_obj.number|add:'3' %}
+        <span class="ellipsis">...</span>
+        {% endif %}
+        {% endfor %}
+        {% endwith %}
+
        {% if page_obj.has_next %}
-            <a href="?page={{ page_obj.next_page_number }}">下一页</a>
+        {% if selected_website %}
+        <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.next_page_number }}">下一页</a>
+        <a href="?website={{ selected_website.id }}{% if search_query %}&q={{ search_query }}{% endif %}&page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
+        {% else %}
+        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.next_page_number }}">下一页</a>
+        <a href="?{% if search_query %}q={{ search_query }}&{% endif %}page={{ page_obj.paginator.num_pages }}">末页 &raquo;</a>
+        {% endif %}
        {% endif %}
    </div>
+</div>
 </body>
 </html>
--- a/core/utils.py
+++ b/core/utils.py
@@ -1,20 +1,50 @@
+# core/utils.py
 import os
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urljoin, urlparse
+from collections import deque
 from django.utils import timezone
 from django.conf import settings
 from core.models import Article
+import re
+

 def download_media(url, save_dir):
    try:
-        resp = requests.get(url, timeout=15)
+        # 添加请求头以避免403 Forbidden错误
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+            "Referer": urljoin(url, "/")
+        }
+        resp = requests.get(url, timeout=15, headers=headers)
        resp.raise_for_status()
    except Exception as e:
        print(f"下载失败：{url}，错误：{e}")
        return None

-    filename = url.split("/")[-1].split("?")[0]
+    # 更安全地处理文件名，去除查询参数并处理特殊字符
+    parsed_url = urlparse(url)
+    filename = os.path.basename(parsed_url.path)
+    if not filename or '.' not in filename:
+        # 如果URL路径中没有有效的文件名，使用默认名称
+        filename = 'media_file'
+
+    # 清理文件名中的特殊字符
+    filename = re.sub(r'[^\w\-_\.]', '_', filename)
+
+    # 确保文件有扩展名
+    if '.' not in filename:
+        content_type = resp.headers.get('content-type', '')
+        if 'image/jpeg' in content_type:
+            filename += '.jpg'
+        elif 'image/png' in content_type:
+            filename += '.png'
+        elif 'image/gif' in content_type:
+            filename += '.gif'
+        else:
+            filename += '.bin'  # 默认二进制扩展名
+
    os.makedirs(save_dir, exist_ok=True)
    filepath = os.path.join(save_dir, filename)

@@ -27,22 +57,51 @@ def download_media(url, save_dir):

    with open(filepath, "wb") as f:
        f.write(resp.content)
+    return filepath

-    # 返回相对路径，方便存数据库和展示
-    return os.path.relpath(filepath, settings.MEDIA_ROOT).replace("\\", "/")

-def crawl_xinhua_article(url, website):
+def process_article(url, website):
+    if Article.objects.filter(url=url).exists():
+        print(f"文章已存在，跳过: {url}")
+        return
+
    headers = {"User-Agent": "Mozilla/5.0"}
    resp = requests.get(url, headers=headers)
    resp.encoding = 'utf-8'
    soup = BeautifulSoup(resp.text, "html.parser")

+    # 处理不同网站的文章结构
+    if website.name == "www.news.cn":
        title_tag = soup.find("span", class_="title")
+        content_tag = soup.find("span", id="detailContent")
+    elif website.name == "东方烟草报":
+        # 优化东方烟草报的标题提取逻辑，按优先级尝试多种选择器
+        title_tag = (
+                soup.find("h1", id="title") or  # 特别针对带id="title"的h1标签
+                soup.find("h1") or  # 主要标题标签
+                soup.find("title") or  # 页面title标签
+                soup.find("div", class_="title") or  # 某些页面可能使用div.title
+                soup.find("h2")  # 备选标题标签
+        )
+        content_tag = soup.find("div", class_="content")  # 东方烟草报的内容通常在div.content中
+        # 增加对另一种内容结构的支持
+        if not content_tag:
+            content_tag = soup.find("div", id="gallery")
+        # 再增加对新内容结构的支持
+        if not content_tag:
+            content_tag = soup.find("div", id="ContentText")
+    else:
+        # 默认处理方式
+        title_tag = soup.find("h1") or soup.find("title")
+        content_tag = soup.find("div", class_="content") or soup.find("div", id="content")
+
    title = title_tag.get_text(strip=True) if title_tag else "无标题"

-    content_tag = soup.find("span", id="detailContent")
+    # 对标题进行额外处理，去除可能的多余空白字符
+    title = title.strip() if title else "无标题"
+
    if not content_tag:
-        print(f"没有找到正文，跳过文章: {url}")
+        print("没有找到正文，跳过:", url)
        return

    imgs = content_tag.find_all("img")
@@ -56,22 +115,16 @@ def crawl_xinhua_article(url, website):
        src = img.get("src")
        if not src:
            continue
-
-        # 这里用文章URL作为基准拼接相对路径，避免错误
        if not src.startswith("http"):
            src = urljoin(url, src)
-
-        local_rel_path = download_media(src, save_dir)
-        if local_rel_path:
-            img["src"] = settings.MEDIA_URL + local_rel_path
-            media_files.append(local_rel_path)
+        local_path = download_media(src, save_dir)
+        if local_path:
+            rel_path = os.path.relpath(local_path, settings.MEDIA_ROOT)
+            img["src"] = settings.MEDIA_URL + rel_path.replace("\\", "/")
+            media_files.append(rel_path.replace("\\", "/"))

    content_html = str(content_tag)

-    if Article.objects.filter(url=url).exists():
-        print(f"文章已存在，跳过: {url}")
-        return
-
    article = Article.objects.create(
        website=website,
        title=title,
@@ -82,22 +135,74 @@ def crawl_xinhua_article(url, website):
    )
    print(f"已保存文章及图片：{title}")

-def crawl_xinhua_list(list_url, website):
+
+def is_valid_url(url, base_netloc):
+    try:
+        parsed = urlparse(url)
+        if parsed.scheme not in ("http", "https"):
+            return False
+        if parsed.netloc != base_netloc:
+            return False
+        return True
+    except Exception:
+        return False
+
+
+def full_site_crawler(start_url, website, max_pages=1000):
    headers = {"User-Agent": "Mozilla/5.0"}
-    resp = requests.get(list_url, headers=headers)
+    visited = set()
+    queue = deque([start_url])
+
+    base_netloc = urlparse(start_url).netloc
+
+    pages_crawled = 0
+
+    while queue and pages_crawled < max_pages:
+        url = queue.popleft()
+        if url in visited:
+            continue
+        print(f"正在爬取：{url}")
+        visited.add(url)
+
+        try:
+            resp = requests.get(url, headers=headers, timeout=15)
+            resp.raise_for_status()
+        except Exception as e:
+            print(f"请求失败：{url}，错误：{e}")
+            continue
+
        resp.encoding = 'utf-8'
        soup = BeautifulSoup(resp.text, "html.parser")

-    article_urls = set()
+        # 根据不同网站判断文章页面
+        is_article_page = False
+        if website.name == "www.news.cn":
+            is_article_page = soup.find("span", id="detailContent") is not None
+        elif website.name == "东方烟草报":
+            # 对于东方烟草报，我们增加基于URL模式的判断
+            # 东方烟草报的文章URL通常包含/content/和日期格式
+            parsed_url = urlparse(url)
+            path = parsed_url.path
+            is_article_page = (
+                    soup.find("div", class_="content") is not None or
+                    soup.find("div", id="gallery") is not None or
+                    soup.find("div", id="ContentText") is not None or
+                    ("/content/" in path and len(path) > 20)
+            )
+        else:
+            # 默认判断逻辑
+            is_article_page = (
+                    soup.find("div", class_="content") is not None or
+                    soup.find("div", id="content") is not None
+            )
+
+        # 如果是文章页面，则调用文章处理
+        if is_article_page:
+            process_article(url, website)
+            pages_crawled += 1
+
+        # 扩展队列，发现新链接
        for link in soup.find_all("a", href=True):
-        href = link["href"]
-        if href.startswith("https://www.news.cn/legal/") and href.endswith("c.html"):
-            article_urls.add(href)
-
-    print(f"在列表页找到 {len(article_urls)} 篇文章链接")
-    for url in article_urls:
-        print("文章链接:", url)
-
-    from core.utils import crawl_xinhua_article
-    for article_url in article_urls:
-        crawl_xinhua_article(article_url, website)
+            href = urljoin(url, link["href"])
+            if href not in visited and is_valid_url(href, base_netloc):
+                queue.append(href)
--- a/core/views.py
+++ b/core/views.py
@@ -1,28 +1,44 @@
-from django.shortcuts import render, get_object_or_404
+from django.shortcuts import render
 from django.core.paginator import Paginator
-from .models import Article
+from .models import Article, Website

 def article_list(request):
-    """
-    显示文章列表的视图函数
-    """
-    articles = Article.objects.all().order_by('-created_at')
-    paginator = Paginator(articles, 20)  # 每页显示10篇文章
+    # 获取所有启用的网站
+    websites = Website.objects.filter(enabled=True)
    
+    # 获取筛选网站
+    selected_website = None
+    articles = Article.objects.all()
+    
+    website_id = request.GET.get('website')
+    if website_id:
+        try:
+            selected_website = Website.objects.get(id=website_id)
+            articles = articles.filter(website=selected_website)
+        except Website.DoesNotExist:
+            pass
+    
+    # 新增:处理关键词搜索
+    search_query = request.GET.get('q')
+    if search_query:
+        articles = articles.filter(title__icontains=search_query)
+    
+    # 按创建时间倒序排列
+    articles = articles.order_by('-created_at')
+    
+    # 分页
+    paginator = Paginator(articles, 10)  # 每页显示10篇文章
    page_number = request.GET.get('page')
    page_obj = paginator.get_page(page_number)
    
    return render(request, 'core/article_list.html', {
-        'page_obj': page_obj
+        'page_obj': page_obj,
+        'websites': websites,
+        'selected_website': selected_website,
+        # 新增:传递搜索关键词到模板
+        'search_query': search_query
    })

 def article_detail(request, article_id):
-    """
-    显示文章详情的视图函数
-    """
-    article = get_object_or_404(Article, id=article_id)
-    return render(request, 'core/article_detail.html', {
-        'article': article
-    })
-
-# Create your views here.
+    article = Article.objects.get(id=article_id)
+    return render(request, 'core/article_detail.html', {'article': article})
--- a/green_classroom/urls.py
+++ b/green_classroom/urls.py
@@ -1,10 +1,15 @@
-from django.contrib import admin
-from django.urls import path, include
 from django.conf import settings
 from django.conf.urls.static import static
+from django.contrib import admin
+from django.urls import path, include
+
+# 需要导入自定义的管理站点实例
+from core.admin import news_cn_admin, dongfangyancao_admin

 urlpatterns = [
    path('admin/', admin.site.urls),
+    path('news_cn_admin/', news_cn_admin.urls),
+    path('dongfangyancao_admin/', dongfangyancao_admin.urls),
    # 以后前台访问放 core app 的 urls
    path('', include('core.urls')),
 ]
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,31 @@
+asgiref==3.9.1
+asttokens==3.0.0
+beautifulsoup4==4.13.4
+bs4==0.0.2
+certifi==2025.8.3
+charset-normalizer==3.4.3
+decorator==5.2.1
+Django==5.1
+executing==2.2.0
+idna==3.10
+ipython==9.4.0
+ipython_pygments_lexers==1.1.1
+jedi==0.19.2
+lxml==6.0.0
+matplotlib-inline==0.1.7
+parso==0.8.4
+pexpect==4.9.0
+prompt_toolkit==3.0.51
+ptyprocess==0.7.0
+pure_eval==0.2.3
+Pygments==2.19.2
+python-docx==1.2.0
+requests==2.32.4
+soupsieve==2.7
+sqlparse==0.5.3
+stack-data==0.6.3
+traitlets==5.14.3
+typing_extensions==4.14.1
+urllib3==2.5.0
+uv==0.8.8
+wcwidth==0.2.13
Author	SHA1	Message	Date
yuangyaa	958b087f54	Add Search button	2025-08-11 23:42:14 +08:00
yuangyaa	b6bbb90703	Support export for Word	2025-08-11 23:14:56 +08:00
yuangyaa	bfd1604872	Add packages	2025-08-11 22:55:57 +08:00
yuangyaa	d9d2ea9d99	Add Support dongfangyaocao	2025-08-11 22:20:19 +08:00
yuangyaa	6d80326a4e	Add Support full site	2025-08-11 14:33:32 +08:00