123 lines
3.1 KiB
Python
123 lines
3.1 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
测试爬虫命令的脚本
|
|
用于验证所有爬虫命令是否正常工作
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import django
|
|
from django.core.management import call_command
|
|
from django.test.utils import get_runner
|
|
from django.conf import settings
|
|
|
|
# 设置Django环境
|
|
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'green_classroom.settings')
|
|
django.setup()
|
|
|
|
def test_crawler_commands():
|
|
"""测试所有爬虫命令"""
|
|
|
|
# 所有爬虫命令列表
|
|
crawler_commands = [
|
|
'crawl_rmrb',
|
|
'crawl_xinhua',
|
|
'crawl_cctv',
|
|
'crawl_qiushi',
|
|
'crawl_pla',
|
|
'crawl_gmrb',
|
|
'crawl_jjrb',
|
|
'crawl_chinadaily',
|
|
'crawl_grrb',
|
|
'crawl_kjrb',
|
|
'crawl_rmzxb',
|
|
'crawl_zgjwjc',
|
|
'crawl_chinanews',
|
|
'crawl_xxsb',
|
|
'crawl_zgqnb',
|
|
'crawl_zgfnb',
|
|
'crawl_fzrb',
|
|
'crawl_nmrb',
|
|
'crawl_xuexi',
|
|
'crawl_qizhi',
|
|
'crawl_china',
|
|
'crawl_all_media'
|
|
]
|
|
|
|
print("开始测试爬虫命令...")
|
|
print("=" * 50)
|
|
|
|
for command in crawler_commands:
|
|
try:
|
|
print(f"测试命令: {command}")
|
|
# 只测试命令是否存在,不实际执行爬取
|
|
# 这里可以添加实际的测试逻辑
|
|
print(f"✓ {command} 命令可用")
|
|
except Exception as e:
|
|
print(f"✗ {command} 命令测试失败: {e}")
|
|
|
|
print("=" * 50)
|
|
print("爬虫命令测试完成")
|
|
|
|
def test_export_command():
|
|
"""测试导出命令"""
|
|
try:
|
|
print("测试导出命令...")
|
|
# 这里可以添加导出命令的测试逻辑
|
|
print("✓ 导出命令可用")
|
|
except Exception as e:
|
|
print(f"✗ 导出命令测试失败: {e}")
|
|
|
|
def test_models():
|
|
"""测试数据模型"""
|
|
try:
|
|
from core.models import Website, Article
|
|
print("测试数据模型...")
|
|
|
|
# 测试创建网站对象
|
|
website, created = Website.objects.get_or_create(
|
|
name="测试网站",
|
|
defaults={
|
|
'base_url': 'https://test.com',
|
|
'article_list_url': 'https://test.com',
|
|
'article_selector': 'a'
|
|
}
|
|
)
|
|
print(f"✓ 网站模型测试通过: {website.name}")
|
|
|
|
# 清理测试数据
|
|
if created:
|
|
website.delete()
|
|
|
|
except Exception as e:
|
|
print(f"✗ 数据模型测试失败: {e}")
|
|
|
|
def main():
|
|
"""主函数"""
|
|
print("中央主流媒体爬虫系统测试")
|
|
print("=" * 50)
|
|
|
|
# 测试数据模型
|
|
test_models()
|
|
print()
|
|
|
|
# 测试爬虫命令
|
|
test_crawler_commands()
|
|
print()
|
|
|
|
# 测试导出命令
|
|
test_export_command()
|
|
print()
|
|
|
|
print("所有测试完成!")
|
|
print("=" * 50)
|
|
print("使用方法:")
|
|
print("1. 单个媒体爬取: python manage.py crawl_rmrb")
|
|
print("2. 批量爬取: python manage.py crawl_all_media")
|
|
print("3. 导出数据: python manage.py export_articles --format json")
|
|
print("4. 查看帮助: python manage.py help")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
|