diff --git a/.gitignore b/.gitignore index 36b13f1..575c1ad 100644 --- a/.gitignore +++ b/.gitignore @@ -166,7 +166,7 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ # Ruff stuff: .ruff_cache/ diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/admin.py b/core/admin.py new file mode 100644 index 0000000..aa37419 --- /dev/null +++ b/core/admin.py @@ -0,0 +1,11 @@ +from django.contrib import admin +from .models import Website, Article + +@admin.register(Website) +class WebsiteAdmin(admin.ModelAdmin): + list_display = ('name', 'base_url', 'enabled') + +@admin.register(Article) +class ArticleAdmin(admin.ModelAdmin): + list_display = ('title', 'website', 'pub_date') + search_fields = ('title', 'content') diff --git a/core/apps.py b/core/apps.py new file mode 100644 index 0000000..8115ae6 --- /dev/null +++ b/core/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class CoreConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'core' diff --git a/core/management/commands/crawl_xinhua.py b/core/management/commands/crawl_xinhua.py new file mode 100644 index 0000000..12b4a5f --- /dev/null +++ b/core/management/commands/crawl_xinhua.py @@ -0,0 +1,23 @@ +from django.core.management.base import BaseCommand +from core.models import Website +from core.utils import crawl_xinhua_article + +class Command(BaseCommand): + help = '爬取新华网文章示例' + + def handle(self, *args, **options): + # 假设你事先在后台建了“新华网”这个Website实例 + website_name = "新华网" + try: + website = Website.objects.get(name=website_name) + except Website.DoesNotExist: + self.stdout.write(self.style.ERROR(f"网站 '{website_name}' 不存在,请先后台创建")) + return + + # 这里写你想爬取的文章URL列表,可以循环多篇 + urls = [ + "https://www.news.cn/legal/20250721/f340f7be3d5b4b938cbd6b9889b6fbdc/c.html", + ] + + for url in urls: + crawl_xinhua_article(url, website) diff --git a/core/migrations/0001_initial.py b/core/migrations/0001_initial.py new file mode 100644 index 0000000..5982f5e --- /dev/null +++ b/core/migrations/0001_initial.py @@ -0,0 +1,41 @@ +# Generated by Django 5.1 on 2025-08-11 04:53 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Website', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=100, unique=True)), + ('base_url', models.URLField()), + ('description', models.TextField(blank=True, null=True)), + ('article_list_url', models.URLField(blank=True, null=True)), + ('article_selector', models.CharField(blank=True, max_length=255, null=True)), + ('content_selector', models.CharField(blank=True, max_length=255, null=True)), + ('enabled', models.BooleanField(default=True)), + ], + ), + migrations.CreateModel( + name='Article', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('title', models.CharField(max_length=300)), + ('url', models.URLField(unique=True)), + ('pub_date', models.DateTimeField(blank=True, null=True)), + ('content', models.TextField()), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('media_files', models.JSONField(blank=True, default=list)), + ('website', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='core.website')), + ], + ), + ] diff --git a/core/migrations/__init__.py b/core/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/models.py b/core/models.py new file mode 100644 index 0000000..a1a1806 --- /dev/null +++ b/core/models.py @@ -0,0 +1,26 @@ +from django.db import models + +class Website(models.Model): + name = models.CharField(max_length=100, unique=True) + base_url = models.URLField() + description = models.TextField(blank=True, null=True) + article_list_url = models.URLField(blank=True, null=True) + article_selector = models.CharField(max_length=255, blank=True, null=True) + content_selector = models.CharField(max_length=255, blank=True, null=True) + enabled = models.BooleanField(default=True) + + def __str__(self): + return self.name + + +class Article(models.Model): + website = models.ForeignKey(Website, on_delete=models.CASCADE) + title = models.CharField(max_length=300) + url = models.URLField(unique=True) + pub_date = models.DateTimeField(null=True, blank=True) + content = models.TextField() # html内容 + created_at = models.DateTimeField(auto_now_add=True) + media_files = models.JSONField(default=list, blank=True) + + def __str__(self): + return self.title diff --git a/core/templates/core/article_detail.html b/core/templates/core/article_detail.html new file mode 100644 index 0000000..ffb2649 --- /dev/null +++ b/core/templates/core/article_detail.html @@ -0,0 +1,17 @@ + + + + + {{ article.title }} + + +

{{ article.title }}

+

发布时间: {{ article.pub_date|date:"Y-m-d H:i" }}

+
+
+ {{ article.content|safe }} +
+
+

返回列表

+ + diff --git a/core/templates/core/article_list.html b/core/templates/core/article_list.html new file mode 100644 index 0000000..f64c0f9 --- /dev/null +++ b/core/templates/core/article_list.html @@ -0,0 +1,33 @@ + + + + + 绿色课堂文章列表 + + +

绿色课堂文章列表

+ + + + + + diff --git a/core/tests.py b/core/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/core/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/core/urls.py b/core/urls.py new file mode 100644 index 0000000..e034494 --- /dev/null +++ b/core/urls.py @@ -0,0 +1,10 @@ +from django.urls import path +from . import views + +urlpatterns = [ + # 主页,文章列表 + path('', views.article_list, name='article_list'), + # 文章详情 + path('article//', views.article_detail, name='article_detail'), + # 后续可以加更多路径 +] diff --git a/core/utils.py b/core/utils.py new file mode 100644 index 0000000..2086378 --- /dev/null +++ b/core/utils.py @@ -0,0 +1,35 @@ +import requests +from bs4 import BeautifulSoup +from django.utils import timezone +from core.models import Website, Article + +def crawl_xinhua_article(url, website): + headers = { + "User-Agent": "Mozilla/5.0" + } + resp = requests.get(url, headers=headers) + resp.encoding = 'utf-8' + soup = BeautifulSoup(resp.text, "html.parser") + + # 提取标题 + title_tag = soup.find("span", class_="title") + title = title_tag.get_text(strip=True) if title_tag else "无标题" + + # 提取正文 + content_tag = soup.find("span", id="detailContent") + paragraphs = content_tag.find_all("p") if content_tag else [] + content_html = "".join(str(p) for p in paragraphs) # 保留p标签的html结构 + + # 如果文章已存在,则不重复插入 + if Article.objects.filter(url=url).exists(): + print(f"文章已存在,跳过: {url}") + return + + article = Article.objects.create( + website=website, + title=title, + url=url, + content=content_html, + pub_date=timezone.now(), + ) + print(f"已保存文章:{title}") diff --git a/core/views.py b/core/views.py new file mode 100644 index 0000000..956c854 --- /dev/null +++ b/core/views.py @@ -0,0 +1,28 @@ +from django.shortcuts import render, get_object_or_404 +from django.core.paginator import Paginator +from .models import Article + +def article_list(request): + """ + 显示文章列表的视图函数 + """ + articles = Article.objects.all().order_by('-created_at') + paginator = Paginator(articles, 10) # 每页显示10篇文章 + + page_number = request.GET.get('page') + page_obj = paginator.get_page(page_number) + + return render(request, 'core/article_list.html', { + 'page_obj': page_obj + }) + +def article_detail(request, article_id): + """ + 显示文章详情的视图函数 + """ + article = get_object_or_404(Article, id=article_id) + return render(request, 'core/article_detail.html', { + 'article': article + }) + +# Create your views here. \ No newline at end of file diff --git a/green_classroom/__init__.py b/green_classroom/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/green_classroom/asgi.py b/green_classroom/asgi.py new file mode 100644 index 0000000..9acdbf9 --- /dev/null +++ b/green_classroom/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for green_classroom project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'green_classroom.settings') + +application = get_asgi_application() diff --git a/green_classroom/settings.py b/green_classroom/settings.py new file mode 100644 index 0000000..12f27df --- /dev/null +++ b/green_classroom/settings.py @@ -0,0 +1,126 @@ +""" +Django settings for green_classroom project. + +Generated by 'django-admin startproject' using Django 5.1. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/5.1/ref/settings/ +""" + +from pathlib import Path + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = 'django-insecure-_kr!&5j#i!)lo(=u-&5ni+21cwxcq)j-35k!ne20)fyx!u6dnl' + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = [] + +# Application definition + +INSTALLED_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'core', +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'green_classroom.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'green_classroom.wsgi.application' + +# Database +# https://docs.djangoproject.com/en/5.1/ref/settings/#databases + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': BASE_DIR / 'db.sqlite3', + } +} + +# Password validation +# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + +# Internationalization +# https://docs.djangoproject.com/en/5.1/topics/i18n/ + +LANGUAGE_CODE = 'zh-Hans' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_TZ = True + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/5.1/howto/static-files/ + +STATIC_URL = 'static/' + +# Default primary key field type +# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' + + +import os + +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +MEDIA_ROOT = os.path.join(BASE_DIR, 'date', 'media') +MEDIA_URL = '/media/' diff --git a/green_classroom/urls.py b/green_classroom/urls.py new file mode 100644 index 0000000..504eba2 --- /dev/null +++ b/green_classroom/urls.py @@ -0,0 +1,13 @@ +from django.contrib import admin +from django.urls import path, include +from django.conf import settings +from django.conf.urls.static import static + +urlpatterns = [ + path('admin/', admin.site.urls), + # 以后前台访问放 core app 的 urls + path('', include('core.urls')), +] + +if settings.DEBUG: + urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) diff --git a/green_classroom/wsgi.py b/green_classroom/wsgi.py new file mode 100644 index 0000000..fe4e14c --- /dev/null +++ b/green_classroom/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for green_classroom project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'green_classroom.settings') + +application = get_wsgi_application() diff --git a/manage.py b/manage.py new file mode 100755 index 0000000..3666fc4 --- /dev/null +++ b/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'green_classroom.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main()