fix bugs and support all platform

This commit is contained in:
2025-08-15 08:33:47 +08:00
parent e82b85f4dd
commit 4945b4c6b0
36 changed files with 2296 additions and 992 deletions

View File

@@ -1,3 +1,312 @@
from django.test import TestCase
import os
import tempfile
import shutil
from django.test import TestCase, override_settings
from django.core.management import call_command
from django.core.management.base import CommandError
from django.utils import timezone
from django.core.files.uploadedfile import SimpleUploadedFile
from unittest.mock import patch, MagicMock
from .models import Website, Article
from .utils import process_article, download_media, is_valid_url, full_site_crawler
from .tasks import crawl_website, crawl_all_websites, health_check
# Create your tests here.
class WebsiteModelTest(TestCase):
"""网站模型测试"""
def setUp(self):
self.website = Website.objects.create(
name='测试网站',
base_url='https://test.com',
description='测试描述'
)
def test_website_creation(self):
"""测试网站创建"""
self.assertEqual(self.website.name, '测试网站')
self.assertEqual(self.website.base_url, 'https://test.com')
self.assertTrue(self.website.enabled)
def test_website_str(self):
"""测试网站字符串表示"""
self.assertEqual(str(self.website), '测试网站')
class ArticleModelTest(TestCase):
"""文章模型测试"""
def setUp(self):
self.website = Website.objects.create(
name='测试网站',
base_url='https://test.com'
)
self.article = Article.objects.create(
website=self.website,
title='测试文章',
url='https://test.com/article/1',
content='<p>测试内容</p>',
media_files=['image1.jpg', 'image2.jpg']
)
def test_article_creation(self):
"""测试文章创建"""
self.assertEqual(self.article.title, '测试文章')
self.assertEqual(self.article.url, 'https://test.com/article/1')
self.assertEqual(len(self.article.media_files), 2)
def test_article_str(self):
"""测试文章字符串表示"""
self.assertEqual(str(self.article), '测试文章')
class UtilsTest(TestCase):
"""工具函数测试"""
def setUp(self):
self.website = Website.objects.create(
name='测试网站',
base_url='https://test.com'
)
self.temp_dir = tempfile.mkdtemp()
def tearDown(self):
shutil.rmtree(self.temp_dir)
def test_is_valid_url(self):
"""测试URL验证"""
from .utils import is_valid_url
# 有效URL
self.assertTrue(is_valid_url('https://test.com/article', 'test.com'))
self.assertTrue(is_valid_url('http://test.com/article', 'test.com'))
# 无效URL
self.assertFalse(is_valid_url('https://other.com/article', 'test.com'))
self.assertFalse(is_valid_url('ftp://test.com/article', 'test.com'))
self.assertFalse(is_valid_url('invalid-url', 'test.com'))
@patch('core.utils.requests.get')
def test_download_media(self, mock_get):
"""测试媒体下载"""
# 模拟响应
mock_response = MagicMock()
mock_response.content = b'fake image content'
mock_response.headers = {'content-type': 'image/jpeg'}
mock_get.return_value = mock_response
# 测试下载
result = download_media('https://test.com/image.jpg', self.temp_dir)
self.assertIsNotNone(result)
self.assertTrue(os.path.exists(result))
@patch('core.utils.requests.get')
@patch('core.utils.download_media')
def test_process_article_success(self, mock_download_media, mock_get):
"""测试文章处理成功"""
# 模拟HTML响应
html_content = '''
<html>
<head><title>测试文章</title></head>
<body>
<h1>测试文章标题</h1>
<div class="content">
<p>测试文章内容</p>
<img src="https://test.com/image.jpg">
</div>
</body>
</html>
'''
mock_response = MagicMock()
mock_response.text = html_content
mock_response.encoding = 'utf-8'
mock_response.raise_for_status.return_value = None
mock_get.return_value = mock_response
# 模拟媒体下载
mock_download_media.return_value = '/tmp/test_image.jpg'
# 测试文章处理
process_article('https://test.com/article/1', self.website)
# 验证文章是否保存
article = Article.objects.filter(url='https://test.com/article/1').first()
self.assertIsNotNone(article)
self.assertEqual(article.title, '测试文章标题')
class ManagementCommandsTest(TestCase):
"""管理命令测试"""
def setUp(self):
self.website = Website.objects.create(
name='测试网站',
base_url='https://test.com'
)
@patch('core.management.commands.crawl_all_media.call_command')
def test_crawl_all_media_command(self, mock_call_command):
"""测试批量爬取命令"""
# 模拟命令执行
mock_call_command.return_value = None
# 执行命令
call_command('crawl_all_media', media='rmrb,xinhua')
# 验证命令被调用
mock_call_command.assert_called()
class CeleryTasksTest(TestCase):
"""Celery任务测试"""
def setUp(self):
self.website = Website.objects.create(
name='测试网站',
base_url='https://test.com'
)
@patch('core.tasks.full_site_crawler')
def test_crawl_website_task(self, mock_crawler):
"""测试单个网站爬取任务"""
# 模拟爬虫函数
mock_crawler.return_value = None
# 执行任务
result = crawl_website(self.website.id)
# 验证结果
self.assertEqual(result['website_id'], self.website.id)
self.assertEqual(result['website_name'], '测试网站')
self.assertEqual(result['status'], 'success')
def test_crawl_website_task_invalid_id(self):
"""测试无效网站ID的任务"""
# 执行任务
with self.assertRaises(Exception):
crawl_website(99999)
@patch('core.tasks.crawl_website.delay')
def test_crawl_all_websites_task(self, mock_delay):
"""测试批量爬取任务"""
# 模拟子任务
mock_result = MagicMock()
mock_result.id = 'task-123'
mock_delay.return_value = mock_result
# 执行任务
result = crawl_all_websites()
# 验证结果
self.assertEqual(result['total_websites'], 1)
self.assertEqual(result['status'], 'started')
def test_health_check_task(self):
"""测试健康检查任务"""
# 执行任务
result = health_check()
# 验证结果
self.assertEqual(result['database'], 'ok')
self.assertEqual(result['website_count'], 1)
self.assertEqual(result['article_count'], 0)
class IntegrationTest(TestCase):
"""集成测试"""
def setUp(self):
self.website = Website.objects.create(
name='集成测试网站',
base_url='https://integration-test.com'
)
def test_full_workflow(self):
"""测试完整工作流程"""
# 1. 创建网站
self.assertEqual(Website.objects.count(), 1)
# 2. 创建文章
article = Article.objects.create(
website=self.website,
title='集成测试文章',
url='https://integration-test.com/article/1',
content='<p>集成测试内容</p>'
)
# 3. 验证关联关系
self.assertEqual(article.website, self.website)
self.assertEqual(self.website.article_set.count(), 1)
# 4. 验证数据完整性
self.assertIsNotNone(article.created_at)
self.assertIsInstance(article.media_files, list)
@override_settings(MEDIA_ROOT=tempfile.mkdtemp())
class MediaHandlingTest(TestCase):
"""媒体文件处理测试"""
def setUp(self):
self.website = Website.objects.create(
name='媒体测试网站',
base_url='https://media-test.com'
)
def test_media_files_field(self):
"""测试媒体文件字段"""
article = Article.objects.create(
website=self.website,
title='媒体测试文章',
url='https://media-test.com/article/1',
content='<p>测试内容</p>',
media_files=['image1.jpg', 'video1.mp4']
)
# 验证媒体文件列表
self.assertEqual(len(article.media_files), 2)
self.assertIn('image1.jpg', article.media_files)
self.assertIn('video1.mp4', article.media_files)
class ErrorHandlingTest(TestCase):
"""错误处理测试"""
def test_duplicate_url_handling(self):
"""测试重复URL处理"""
website = Website.objects.create(
name='错误测试网站',
base_url='https://error-test.com'
)
# 创建第一篇文章
article1 = Article.objects.create(
website=website,
title='第一篇文章',
url='https://error-test.com/article/1',
content='<p>内容1</p>'
)
# 尝试创建相同URL的文章
with self.assertRaises(Exception):
Article.objects.create(
website=website,
title='第二篇文章',
url='https://error-test.com/article/1', # 相同URL
content='<p>内容2</p>'
)
def test_invalid_website_data(self):
"""测试无效网站数据"""
# 测试重复名称unique约束
Website.objects.create(
name='测试网站1',
base_url='https://test1.com'
)
with self.assertRaises(Exception):
Website.objects.create(
name='测试网站1', # 重复名称
base_url='https://test2.com'
)