Add support *
This commit is contained in:
@@ -42,6 +42,10 @@ def dashboard(request):
|
||||
page_number = request.GET.get('page', 1)
|
||||
page_size = request.GET.get('page_size', 20) # 默认每页20篇文章
|
||||
|
||||
# 获取时间范围参数
|
||||
start_date = request.GET.get('start_date', '')
|
||||
end_date = request.GET.get('end_date', '')
|
||||
|
||||
# 尝试转换page_size为整数
|
||||
try:
|
||||
page_size = int(page_size)
|
||||
@@ -61,6 +65,20 @@ def dashboard(request):
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
# 添加时间范围筛选
|
||||
if start_date:
|
||||
all_contents = all_contents.filter(created_at__gte=start_date)
|
||||
if end_date:
|
||||
# 将结束日期设置为当天的结束时间
|
||||
from django.utils import timezone
|
||||
from datetime import datetime
|
||||
try:
|
||||
end_datetime = datetime.strptime(end_date, '%Y-%m-%d')
|
||||
end_datetime = timezone.make_aware(end_datetime.replace(hour=23, minute=59, second=59))
|
||||
all_contents = all_contents.filter(created_at__lte=end_datetime)
|
||||
except ValueError:
|
||||
pass # 如果日期格式不正确,忽略时间筛选
|
||||
|
||||
# 分页处理
|
||||
paginator = Paginator(all_contents, page_size)
|
||||
page_obj = paginator.get_page(page_number)
|
||||
@@ -93,6 +111,8 @@ def dashboard(request):
|
||||
'page_size': page_size,
|
||||
'recent_tasks': recent_tasks,
|
||||
'total_media_files': total_media_files,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
}
|
||||
|
||||
return render(request, 'crawler/dashboard.html', {'stats': stats})
|
||||
@@ -101,6 +121,8 @@ def dashboard(request):
|
||||
def search_page(request):
|
||||
"""搜索页面视图"""
|
||||
keyword = request.GET.get('q', '').strip()
|
||||
start_date = request.GET.get('start_date', '')
|
||||
end_date = request.GET.get('end_date', '')
|
||||
contents = []
|
||||
|
||||
if keyword:
|
||||
@@ -110,15 +132,38 @@ def search_page(request):
|
||||
defaults={'last_used': timezone.now()}
|
||||
)
|
||||
|
||||
# 搜索内容
|
||||
contents = CrawledContent.objects.filter(
|
||||
Q(title__icontains=keyword) |
|
||||
Q(content__icontains=keyword) |
|
||||
Q(keywords_matched__icontains=keyword)
|
||||
).order_by('-created_at')[:50]
|
||||
# 构建模糊搜索查询
|
||||
import re
|
||||
|
||||
# 处理通配符:将用户输入的 * 转换为数据库正则表达式的 .*
|
||||
# 同时转义特殊字符以防止正则表达式错误
|
||||
escaped_keyword = re.escape(keyword)
|
||||
regex_pattern = escaped_keyword.replace(r'\*', '.*')
|
||||
|
||||
# 搜索内容 - 使用正则表达式进行模糊匹配
|
||||
contents_query = CrawledContent.objects.filter(
|
||||
Q(title__iregex=regex_pattern) |
|
||||
Q(content__iregex=regex_pattern) |
|
||||
Q(keywords_matched__iregex=regex_pattern)
|
||||
)
|
||||
|
||||
# 添加时间范围筛选
|
||||
if start_date:
|
||||
contents_query = contents_query.filter(created_at__gte=start_date)
|
||||
if end_date:
|
||||
# 将结束日期设置为当天的结束时间
|
||||
from django.utils import timezone
|
||||
from datetime import datetime
|
||||
end_datetime = datetime.strptime(end_date, '%Y-%m-%d')
|
||||
end_datetime = timezone.make_aware(end_datetime.replace(hour=23, minute=59, second=59))
|
||||
contents_query = contents_query.filter(created_at__lte=end_datetime)
|
||||
|
||||
contents = contents_query.order_by('-created_at')[:50]
|
||||
|
||||
return render(request, 'crawler/search.html', {
|
||||
'keyword': keyword,
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
'contents': contents
|
||||
})
|
||||
|
||||
@@ -281,10 +326,10 @@ def preview_crawled_content(request, content_id):
|
||||
<div class="meta">
|
||||
<p><strong>来源网站:</strong> {content.website.name} ({content.website.region})</p>
|
||||
<p><strong>原始链接:</strong> <a href="{content.url}" target="_blank">{content.url}</a></p>
|
||||
<p><strong>发布时间:</strong> {content.publish_date or '未知'}</p>
|
||||
<p><strong>发布时间:</strong> {content.publish_date.strftime('%Y-%m-%d %H:%M:%S') if content.publish_date else '未知'}</p>
|
||||
<p><strong>作者:</strong> {content.author or '未知'}</p>
|
||||
<p><strong>匹配关键字:</strong> {content.keywords_matched}</p>
|
||||
<p><strong>爬取时间:</strong> {content.created_at}</p>
|
||||
<p><strong>爬取时间:</strong> {content.created_at.strftime('%Y-%m-%d %H:%M:%S')}</p>
|
||||
<p><strong>媒体文件数量:</strong> {len(media_files)}</p>
|
||||
</div>
|
||||
|
||||
@@ -315,11 +360,11 @@ def download_crawled_content(request, content_id):
|
||||
# 添加元数据
|
||||
doc.add_paragraph(f'来源网站: {content.website.name} ({content.website.region})')
|
||||
doc.add_paragraph(f'原始链接: {content.url}')
|
||||
doc.add_paragraph(f'发布时间: {content.publish_date or "未知"}')
|
||||
doc.add_paragraph(f'发布时间: {content.publish_date.strftime("%Y-%m-%d %H:%M:%S") if content.publish_date else "未知"}')
|
||||
doc.add_paragraph(f'作者: {content.author or "未知"}')
|
||||
doc.add_paragraph(f'匹配关键字: {content.keywords_matched}')
|
||||
doc.add_paragraph(f'爬取时间: {content.created_at}')
|
||||
|
||||
doc.add_paragraph(f'爬取时间: {content.created_at.strftime("%Y-%m-%d %H:%M:%S")}')
|
||||
|
||||
# 添加内容
|
||||
doc.add_heading('正文', level=1)
|
||||
for paragraph in content.content.split('\n\n'):
|
||||
@@ -391,11 +436,11 @@ def download_selected_contents(request):
|
||||
# 添加元数据
|
||||
doc.add_paragraph(f'来源网站: {content.website.name} ({content.website.region})')
|
||||
doc.add_paragraph(f'原始链接: {content.url}')
|
||||
doc.add_paragraph(f'发布时间: {content.publish_date or "未知"}')
|
||||
doc.add_paragraph(f'发布时间: {content.publish_date.strftime("%Y-%m-%d %H:%M:%S") if content.publish_date else "未知"}')
|
||||
doc.add_paragraph(f'作者: {content.author or "未知"}')
|
||||
doc.add_paragraph(f'匹配关键字: {content.keywords_matched}')
|
||||
doc.add_paragraph(f'爬取时间: {content.created_at}')
|
||||
|
||||
doc.add_paragraph(f'爬取时间: {content.created_at.strftime("%Y-%m-%d %H:%M:%S")}')
|
||||
|
||||
# 添加内容
|
||||
doc.add_heading('正文', level=1)
|
||||
for paragraph in content.content.split('\n\n'):
|
||||
|
||||
Reference in New Issue
Block a user