diff --git a/.idea/misc.xml b/.idea/misc.xml
index 3232bb5..e4b6925 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,7 @@
+
+
+
\ No newline at end of file
diff --git a/src/green_classroom/apps/collector/admin.py b/src/green_classroom/apps/collector/admin.py
index 8c38f3f..4a8b5c9 100644
--- a/src/green_classroom/apps/collector/admin.py
+++ b/src/green_classroom/apps/collector/admin.py
@@ -1,3 +1,13 @@
from django.contrib import admin
+from .models import Article, SourceSite
-# Register your models here.
+@admin.register(Article)
+class ArticleAdmin(admin.ModelAdmin):
+ list_display = ('title', 'category', 'publish_date', 'source', 'is_verified')
+ list_filter = ('category', 'source', 'is_verified')
+ search_fields = ('title', 'content')
+
+@admin.register(SourceSite)
+class SourceSiteAdmin(admin.ModelAdmin):
+ list_display = ('name', 'url', 'is_active')
+ search_fields = ('name', 'url')
diff --git a/src/green_classroom/apps/collector/apps.py b/src/green_classroom/apps/collector/apps.py
index 0117b57..c1e37fc 100644
--- a/src/green_classroom/apps/collector/apps.py
+++ b/src/green_classroom/apps/collector/apps.py
@@ -3,4 +3,4 @@ from django.apps import AppConfig
class CollectorConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
- name = 'collector'
+ name = 'green_classroom.apps.collector' # 修改为完整路径
\ No newline at end of file
diff --git a/src/green_classroom/apps/collector/crawler/__init__.py b/src/green_classroom/apps/collector/crawler/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/green_classroom/apps/collector/crawler/xinhua.py b/src/green_classroom/apps/collector/crawler/xinhua.py
new file mode 100644
index 0000000..9faaaf4
--- /dev/null
+++ b/src/green_classroom/apps/collector/crawler/xinhua.py
@@ -0,0 +1,57 @@
+# xinhua.py
+import requests
+from bs4 import BeautifulSoup
+
+def parse_xinhua_article(url: str):
+ """
+ 解析新华网文章页,返回 dict 包含标题与正文
+ """
+ headers = {
+ "User-Agent": "Mozilla/5.0"
+ }
+
+ try:
+ resp = requests.get(url, headers=headers, timeout=10)
+ resp.encoding = 'utf-8'
+ except requests.RequestException as e:
+ print(f"❌ 请求失败:{e}")
+ return None
+
+ soup = BeautifulSoup(resp.text, "html.parser")
+
+ # 提取标题
+ title_tag = soup.find("span", class_="title")
+ title = title_tag.get_text(strip=True) if title_tag else "无标题"
+
+ # 提取正文
+ content_tag = soup.find("span", id="detailContent")
+ if not content_tag:
+ print(f"❌ 没找到 detailContent: {url}")
+ return None
+
+ paragraphs = content_tag.find_all("p")
+ content = "\n".join(p.get_text(strip=True) for p in paragraphs)
+
+ # 有效性校验
+ if len(content.strip()) < 50:
+ print(f"⚠️ 内容过短:{url}")
+ return None
+
+ return {
+ "url": url,
+ "title": title,
+ "content": content
+ }
+
+def crawl_xinhua_green():
+ """
+ 启动新华网绿色内容爬虫的入口函数
+ """
+ print("✅ 开始爬取新华网绿色内容...")
+ # 示例 URL,实际应根据需求进行扩展
+ test_url = "https://www.xinhua.net.cn/example-article"
+ result = parse_xinhua_article(test_url)
+ if result:
+ print("✅ 爬取成功:", result['title'])
+ else:
+ print("❌ 爬取失败")
\ No newline at end of file
diff --git a/src/green_classroom/apps/collector/management/__init__.py b/src/green_classroom/apps/collector/management/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/green_classroom/apps/collector/management/commands/__init__.py b/src/green_classroom/apps/collector/management/commands/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/green_classroom/apps/collector/management/commands/crawl_xinhua.py b/src/green_classroom/apps/collector/management/commands/crawl_xinhua.py
new file mode 100644
index 0000000..7b34c27
--- /dev/null
+++ b/src/green_classroom/apps/collector/management/commands/crawl_xinhua.py
@@ -0,0 +1,10 @@
+# src/green_classroom/apps/collector/management/commands/crawl_xinhua.py
+
+from django.core.management.base import BaseCommand
+from green_classroom.apps.collector.crawler.xinhua import crawl_xinhua_green
+
+class Command(BaseCommand):
+ help = "抓取新华网绿色发展相关资料"
+
+ def handle(self, *args, **kwargs):
+ crawl_xinhua_green()
diff --git a/src/green_classroom/apps/collector/migrations/0001_initial.py b/src/green_classroom/apps/collector/migrations/0001_initial.py
new file mode 100644
index 0000000..442cc2e
--- /dev/null
+++ b/src/green_classroom/apps/collector/migrations/0001_initial.py
@@ -0,0 +1,39 @@
+# Generated by Django 5.1 on 2025-07-21 12:00
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ initial = True
+
+ dependencies = [
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='SourceSite',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('name', models.CharField(max_length=100)),
+ ('url', models.URLField()),
+ ('is_active', models.BooleanField(default=True)),
+ ('remarks', models.TextField(blank=True)),
+ ],
+ ),
+ migrations.CreateModel(
+ name='Article',
+ fields=[
+ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('title', models.CharField(max_length=300)),
+ ('url', models.URLField(unique=True)),
+ ('publish_date', models.DateField(blank=True, null=True)),
+ ('content', models.TextField()),
+ ('category', models.CharField(choices=[('政策', '政策'), ('案例', '案例'), ('新闻', '新闻'), ('科研', '科研')], max_length=100)),
+ ('created_at', models.DateTimeField(auto_now_add=True)),
+ ('is_verified', models.BooleanField(default=False)),
+ ('source', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='collector.sourcesite')),
+ ],
+ ),
+ ]
diff --git a/src/green_classroom/apps/collector/models.py b/src/green_classroom/apps/collector/models.py
index 71a8362..872e01f 100644
--- a/src/green_classroom/apps/collector/models.py
+++ b/src/green_classroom/apps/collector/models.py
@@ -1,3 +1,30 @@
from django.db import models
-# Create your models here.
+class SourceSite(models.Model):
+ name = models.CharField(max_length=100)
+ url = models.URLField()
+ is_active = models.BooleanField(default=True)
+ remarks = models.TextField(blank=True)
+
+ def __str__(self):
+ return self.name
+
+class Article(models.Model):
+ CATEGORY_CHOICES = [
+ ('政策', '政策'),
+ ('案例', '案例'),
+ ('新闻', '新闻'),
+ ('科研', '科研'),
+ ]
+
+ title = models.CharField(max_length=300)
+ url = models.URLField(unique=True)
+ publish_date = models.DateField(null=True, blank=True)
+ content = models.TextField()
+ category = models.CharField(max_length=100, choices=CATEGORY_CHOICES)
+ source = models.ForeignKey(SourceSite, on_delete=models.SET_NULL, null=True)
+ created_at = models.DateTimeField(auto_now_add=True)
+ is_verified = models.BooleanField(default=False)
+
+ def __str__(self):
+ return self.title
diff --git a/src/green_classroom/apps/collector/templates/collector/article_detail.html b/src/green_classroom/apps/collector/templates/collector/article_detail.html
new file mode 100644
index 0000000..956b661
--- /dev/null
+++ b/src/green_classroom/apps/collector/templates/collector/article_detail.html
@@ -0,0 +1,16 @@
+
+
+
+ {{ article.title }}
+
+
+ {{ article.title }}
+ 来源:{{ article.source.name }}
+ 分类:{{ article.category }}
+ 发布时间:{{ article.publish_date }}
+
+ {{ article.content|linebreaks }}
+
+ 返回列表
+
+
diff --git a/src/green_classroom/apps/collector/templates/collector/article_list.html b/src/green_classroom/apps/collector/templates/collector/article_list.html
new file mode 100644
index 0000000..b41bd1f
--- /dev/null
+++ b/src/green_classroom/apps/collector/templates/collector/article_list.html
@@ -0,0 +1,24 @@
+
+
+
+ 绿色课堂资料库
+
+
+ 绿色课堂资料库
+
+
+
+ {% for article in articles %}
+ -
+ {{ article.title }}
+ ({{ article.category }} | {{ article.publish_date }})
+
+ {% empty %}
+ - 暂无内容
+ {% endfor %}
+
+
+
diff --git a/src/green_classroom/apps/collector/urls.py b/src/green_classroom/apps/collector/urls.py
new file mode 100644
index 0000000..d220e8b
--- /dev/null
+++ b/src/green_classroom/apps/collector/urls.py
@@ -0,0 +1,9 @@
+from django.urls import path
+from . import views
+
+app_name = 'collector'
+
+urlpatterns = [
+ path('', views.article_list, name='article_list'),
+ path('/', views.article_detail, name='article_detail'),
+]
diff --git a/src/green_classroom/apps/collector/views.py b/src/green_classroom/apps/collector/views.py
index 91ea44a..0e28803 100644
--- a/src/green_classroom/apps/collector/views.py
+++ b/src/green_classroom/apps/collector/views.py
@@ -1,3 +1,20 @@
-from django.shortcuts import render
+from django.shortcuts import render, get_object_or_404
+from .models import Article
+from django.db.models import Q
-# Create your views here.
+def article_list(request):
+ query = request.GET.get('q', '')
+ articles = Article.objects.filter(is_verified=True)
+
+ if query:
+ articles = articles.filter(Q(title__icontains=query) | Q(content__icontains=query))
+
+ return render(request, 'collector/article_list.html', {
+ 'articles': articles.order_by('-publish_date')[:100]
+ })
+
+def article_detail(request, pk):
+ article = get_object_or_404(Article, pk=pk, is_verified=True)
+ return render(request, 'collector/article_detail.html', {
+ 'article': article
+ })
diff --git a/src/green_classroom/settings.py b/src/green_classroom/settings.py
index b6cf62a..f15efac 100644
--- a/src/green_classroom/settings.py
+++ b/src/green_classroom/settings.py
@@ -37,6 +37,7 @@ INSTALLED_APPS = [
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
+ 'green_classroom.apps.collector.apps.CollectorConfig',
]
MIDDLEWARE = [
diff --git a/src/green_classroom/urls.py b/src/green_classroom/urls.py
index 3df1d83..3d5312c 100644
--- a/src/green_classroom/urls.py
+++ b/src/green_classroom/urls.py
@@ -1,22 +1,7 @@
-"""
-URL configuration for green_classroom project.
-
-The `urlpatterns` list routes URLs to views. For more information please see:
- https://docs.djangoproject.com/en/5.1/topics/http/urls/
-Examples:
-Function views
- 1. Add an import: from my_app import views
- 2. Add a URL to urlpatterns: path('', views.home, name='home')
-Class-based views
- 1. Add an import: from other_app.views import Home
- 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
-Including another URLconf
- 1. Import the include() function: from django.urls import include, path
- 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
-"""
from django.contrib import admin
-from django.urls import path
+from django.urls import path, include
urlpatterns = [
path('admin/', admin.site.urls),
+ path('collector/', include('green_classroom.apps.collector.urls')),
]
diff --git a/src/test_xinhua_article.py b/src/test_xinhua_article.py
new file mode 100644
index 0000000..7d9d3d8
--- /dev/null
+++ b/src/test_xinhua_article.py
@@ -0,0 +1,10 @@
+from green_classroom.apps.collector.crawler.xinhua import parse_xinhua_article
+
+url = "https://www.news.cn/legal/20250721/f340f7be3d5b4b938cbd6b9889b6fbdc/c.html"
+article = parse_xinhua_article(url)
+
+if article:
+ print("✅ 成功抓取文章:", article["title"])
+ print("📄 正文预览:\n", article["content"][:500])
+else:
+ print("❌ 抓取失败")