Base setup
This commit is contained in:
4
crawler_project/__init__.py
Normal file
4
crawler_project/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
# 这将确保Celery应用在Django启动时被加载
|
||||
from .celery import app as celery_app
|
||||
|
||||
__all__ = ('celery_app',)
|
||||
16
crawler_project/asgi.py
Normal file
16
crawler_project/asgi.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
ASGI config for crawler_project project.
|
||||
|
||||
It exposes the ASGI callable as a module-level variable named ``application``.
|
||||
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/5.2/howto/deployment/asgi/
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from django.core.asgi import get_asgi_application
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'crawler_project.settings')
|
||||
|
||||
application = get_asgi_application()
|
||||
17
crawler_project/celery.py
Normal file
17
crawler_project/celery.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import os
|
||||
from celery import Celery
|
||||
|
||||
# 设置Django设置模块
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'crawler_project.settings')
|
||||
|
||||
app = Celery('crawler_project')
|
||||
|
||||
# 使用Django设置文件配置Celery
|
||||
app.config_from_object('django.conf:settings', namespace='CELERY')
|
||||
|
||||
# 自动发现任务
|
||||
app.autodiscover_tasks()
|
||||
|
||||
@app.task(bind=True)
|
||||
def debug_task(self):
|
||||
print(f'Request: {self.request!r}')
|
||||
181
crawler_project/settings.py
Normal file
181
crawler_project/settings.py
Normal file
@@ -0,0 +1,181 @@
|
||||
"""
|
||||
Django settings for crawler_project project.
|
||||
|
||||
Generated by 'django-admin startproject' using Django 5.2.6.
|
||||
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/5.2/topics/settings/
|
||||
|
||||
For the full list of settings and their values, see
|
||||
https://docs.djangoproject.com/en/5.2/ref/settings/
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
# Quick-start development settings - unsuitable for production
|
||||
# See https://docs.djangoproject.com/en/5.2/howto/deployment/checklist/
|
||||
|
||||
# SECURITY WARNING: keep the secret key used in production secret!
|
||||
SECRET_KEY = 'django-insecure-w5lm159dl-)=z!dysfxf8!n^o26^6)4^!@5(yp*5-_c=!_tcq!'
|
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production!
|
||||
DEBUG = True
|
||||
|
||||
ALLOWED_HOSTS = []
|
||||
|
||||
|
||||
# Application definition
|
||||
|
||||
INSTALLED_APPS = [
|
||||
'django.contrib.admin',
|
||||
'django.contrib.auth',
|
||||
'django.contrib.contenttypes',
|
||||
'django.contrib.sessions',
|
||||
'django.contrib.messages',
|
||||
'django.contrib.staticfiles',
|
||||
'crawler',
|
||||
'rest_framework',
|
||||
]
|
||||
|
||||
MIDDLEWARE = [
|
||||
'django.middleware.security.SecurityMiddleware',
|
||||
'django.contrib.sessions.middleware.SessionMiddleware',
|
||||
'django.middleware.common.CommonMiddleware',
|
||||
'django.middleware.csrf.CsrfViewMiddleware',
|
||||
'django.contrib.auth.middleware.AuthenticationMiddleware',
|
||||
'django.contrib.messages.middleware.MessageMiddleware',
|
||||
'django.middleware.clickjacking.XFrameOptionsMiddleware',
|
||||
]
|
||||
|
||||
ROOT_URLCONF = 'crawler_project.urls'
|
||||
|
||||
TEMPLATES = [
|
||||
{
|
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates',
|
||||
'DIRS': [],
|
||||
'APP_DIRS': True,
|
||||
'OPTIONS': {
|
||||
'context_processors': [
|
||||
'django.template.context_processors.request',
|
||||
'django.contrib.auth.context_processors.auth',
|
||||
'django.contrib.messages.context_processors.messages',
|
||||
],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
WSGI_APPLICATION = 'crawler_project.wsgi.application'
|
||||
|
||||
|
||||
# Database
|
||||
# https://docs.djangoproject.com/en/5.2/ref/settings/#databases
|
||||
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': BASE_DIR / 'db.sqlite3',
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# Password validation
|
||||
# https://docs.djangoproject.com/en/5.2/ref/settings/#auth-password-validators
|
||||
|
||||
AUTH_PASSWORD_VALIDATORS = [
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
|
||||
},
|
||||
{
|
||||
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
# Internationalization
|
||||
# https://docs.djangoproject.com/en/5.2/topics/i18n/
|
||||
|
||||
LANGUAGE_CODE = 'zh-hans'
|
||||
|
||||
TIME_ZONE = 'Asia/Shanghai'
|
||||
|
||||
USE_I18N = True
|
||||
|
||||
USE_TZ = True
|
||||
|
||||
|
||||
# Static files (CSS, JavaScript, Images)
|
||||
# https://docs.djangoproject.com/en/5.2/howto/static-files/
|
||||
|
||||
STATIC_URL = 'static/'
|
||||
|
||||
# Media files (用户上传的文件)
|
||||
MEDIA_URL = '/media/'
|
||||
MEDIA_ROOT = BASE_DIR / 'media'
|
||||
|
||||
# Default primary key field type
|
||||
# https://docs.djangoproject.com/en/5.2/ref/settings/#default-auto-field
|
||||
|
||||
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
|
||||
|
||||
# Celery配置
|
||||
CELERY_BROKER_URL = 'redis://localhost:6379/0'
|
||||
CELERY_RESULT_BACKEND = 'redis://localhost:6379/0'
|
||||
CELERY_ACCEPT_CONTENT = ['json']
|
||||
CELERY_TASK_SERIALIZER = 'json'
|
||||
CELERY_RESULT_SERIALIZER = 'json'
|
||||
CELERY_TIMEZONE = TIME_ZONE
|
||||
|
||||
# 爬虫配置
|
||||
CRAWLER_SETTINGS = {
|
||||
'USER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'REQUEST_DELAY': 1, # 请求间隔(秒)
|
||||
'TIMEOUT': 30, # 请求超时时间
|
||||
'MAX_RETRIES': 3, # 最大重试次数
|
||||
}
|
||||
|
||||
# 目标网站列表
|
||||
TARGET_WEBSITES = [
|
||||
{'name': '中共中央纪委', 'url': 'https://www.ccdi.gov.cn/', 'region': '中央'},
|
||||
{'name': '北京纪检监察', 'url': 'https://www.bjsupervision.gov.cn/', 'region': '北京'},
|
||||
{'name': '天津纪检监察', 'url': 'https://www.tjjw.gov.cn/', 'region': '天津'},
|
||||
{'name': '河北纪检监察', 'url': 'http://www.hebcdi.gov.cn/', 'region': '河北'},
|
||||
{'name': '山西纪检监察', 'url': 'http://www.sxdi.gov.cn/', 'region': '山西'},
|
||||
{'name': '内蒙古纪检监察', 'url': 'https://www.nmgjjjc.gov.cn/', 'region': '内蒙古'},
|
||||
{'name': '辽宁纪检监察', 'url': 'https://www.lnsjjjc.gov.cn/', 'region': '辽宁'},
|
||||
{'name': '吉林纪检监察', 'url': 'http://ccdijl.gov.cn/', 'region': '吉林'},
|
||||
{'name': '黑龙江纪检监察', 'url': 'https://www.hljjjjc.gov.cn/Hljjjjc/', 'region': '黑龙江'},
|
||||
{'name': '上海纪检监察', 'url': 'https://www.shjjjc.gov.cn/', 'region': '上海'},
|
||||
{'name': '江苏纪检监察', 'url': 'https://www.jssjw.gov.cn/', 'region': '江苏'},
|
||||
{'name': '浙江纪检监察', 'url': 'https://www.zjsjw.gov.cn/shouye/', 'region': '浙江'},
|
||||
{'name': '安徽纪检监察', 'url': 'http://www.ahjjjc.gov.cn/', 'region': '安徽'},
|
||||
{'name': '福建纪检监察', 'url': 'https://www.fjcdi.gov.cn/cms/html/fjsjwjw/index.html', 'region': '福建'},
|
||||
{'name': '江西纪检监察', 'url': 'http://www.jxdi.gov.cn/', 'region': '江西'},
|
||||
{'name': '山东纪检监察', 'url': 'https://www.sdjj.gov.cn/', 'region': '山东'},
|
||||
{'name': '河南纪检监察', 'url': 'https://www.hnsjw.gov.cn/sitesources/hnsjct/page_pc/index.html', 'region': '河南'},
|
||||
{'name': '湖北纪检监察', 'url': 'https://www.hbjwjc.gov.cn/', 'region': '湖北'},
|
||||
{'name': '湖南纪检监察', 'url': 'https://www.sxfj.gov.cn/', 'region': '湖南'},
|
||||
{'name': '广东纪检监察', 'url': 'https://www.gdjct.gd.gov.cn/', 'region': '广东'},
|
||||
{'name': '广西纪检监察', 'url': 'https://www.gxjjw.gov.cn/index.shtml', 'region': '广西'},
|
||||
{'name': '海南纪检监察', 'url': 'https://www.hncdi.gov.cn/web/hnlzw/v2/html/index.jsp', 'region': '海南'},
|
||||
{'name': '重庆纪检监察', 'url': 'https://jjc.cq.gov.cn/', 'region': '重庆'},
|
||||
{'name': '四川纪检监察', 'url': 'https://www.scjc.gov.cn/', 'region': '四川'},
|
||||
{'name': '贵州纪检监察', 'url': 'http://www.gzdis.gov.cn/', 'region': '贵州'},
|
||||
{'name': '云南纪检监察', 'url': 'http://www.ynjjjc.gov.cn/', 'region': '云南'},
|
||||
{'name': '西藏纪检监察', 'url': 'http://www.xzjjw.gov.cn/', 'region': '西藏'},
|
||||
{'name': '陕西纪检监察', 'url': 'https://www.qinfeng.gov.cn/', 'region': '陕西'},
|
||||
{'name': '甘肃纪检监察', 'url': 'http://www.gsjw.gov.cn/', 'region': '甘肃'},
|
||||
{'name': '青海纪检监察', 'url': 'http://www.nxjjjc.gov.cn/', 'region': '青海'},
|
||||
{'name': '宁夏纪检监察', 'url': 'http://www.qhjc.gov.cn/', 'region': '宁夏'},
|
||||
{'name': '新疆纪检监察', 'url': 'https://www.xjjw.gov.cn/', 'region': '新疆'},
|
||||
{'name': '新疆兵团纪检监察', 'url': 'http://btjw.xjbt.gov.cn/', 'region': '新疆兵团'},
|
||||
]
|
||||
29
crawler_project/urls.py
Normal file
29
crawler_project/urls.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""
|
||||
URL configuration for crawler_project project.
|
||||
|
||||
The `urlpatterns` list routes URLs to views. For more information please see:
|
||||
https://docs.djangoproject.com/en/5.2/topics/http/urls/
|
||||
Examples:
|
||||
Function views
|
||||
1. Add an import: from my_app import views
|
||||
2. Add a URL to urlpatterns: path('', views.home, name='home')
|
||||
Class-based views
|
||||
1. Add an import: from other_app.views import Home
|
||||
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
|
||||
Including another URLconf
|
||||
1. Import the include() function: from django.urls import include, path
|
||||
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
|
||||
"""
|
||||
from django.contrib import admin
|
||||
from django.urls import path, include
|
||||
from django.conf import settings
|
||||
from django.conf.urls.static import static
|
||||
|
||||
urlpatterns = [
|
||||
path('admin/', admin.site.urls),
|
||||
path('', include('crawler.urls')),
|
||||
]
|
||||
|
||||
if settings.DEBUG:
|
||||
urlpatterns += static(settings.STATIC_URL, document_root=settings.STATIC_ROOT)
|
||||
urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
|
||||
16
crawler_project/wsgi.py
Normal file
16
crawler_project/wsgi.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
WSGI config for crawler_project project.
|
||||
|
||||
It exposes the WSGI callable as a module-level variable named ``application``.
|
||||
|
||||
For more information on this file, see
|
||||
https://docs.djangoproject.com/en/5.2/howto/deployment/wsgi/
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
from django.core.wsgi import get_wsgi_application
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'crawler_project.settings')
|
||||
|
||||
application = get_wsgi_application()
|
||||
Reference in New Issue
Block a user