Try Web.py

This commit is contained in:
2025-09-25 14:26:32 +08:00
parent 4010d9dd8c
commit 3cb7b11756

516
Convention/Runtime/Web.py Normal file
View File

@@ -0,0 +1,516 @@
from .Config import *
from .File import ToolFile
import json
import urllib.parse
import urllib.request
import urllib.error
import asyncio
import aiohttp
import os
import re
from typing import *
from pydantic import BaseModel
try:
import aiohttp
import aiofiles
except ImportError as e:
ImportingThrow(e, "Web", ["aiohttp", "aiofiles"])
class WebError(Exception):
"""网络操作异常基类"""
pass
class URLValidationError(WebError):
"""URL验证异常"""
pass
class HTTPRequestError(WebError):
"""HTTP请求异常"""
pass
class DownloadError(WebError):
"""下载异常"""
pass
class ToolURL(BaseModel):
"""网络URL工具类提供HTTP客户端和URL操作功能"""
url: str
def __init__(self, url: Union[str, 'ToolURL']):
"""
从URL字符串创建对象
Args:
url: URL字符串或ToolURL对象
"""
if isinstance(url, ToolURL):
url = url.url
super().__init__(url=str(url))
def __str__(self) -> str:
"""隐式字符串转换"""
return self.url
def __bool__(self) -> bool:
"""隐式布尔转换等同于IsValid"""
return self.IsValid
def ToString(self) -> str:
"""获取完整URL"""
return self.url
def GetFullURL(self) -> str:
"""获取完整URL"""
return self.url
@property
def FullURL(self) -> str:
"""获取完整URL属性"""
return self.url
@property
def IsValid(self) -> bool:
"""检查URL是否有效"""
return self.ValidateURL()
def ValidateURL(self) -> bool:
"""
验证URL格式
Returns:
是否为有效的HTTP/HTTPS URL
"""
try:
parsed = urllib.parse.urlparse(self.url)
return parsed.scheme in ('http', 'https') and parsed.netloc != ''
except Exception:
return False
def GetFilename(self) -> str:
"""
获取URL中的文件名
Returns:
URL路径中的文件名
"""
try:
parsed = urllib.parse.urlparse(self.url)
path = parsed.path
if path:
return os.path.basename(path)
return ""
except Exception:
return ""
def GetExtension(self) -> str:
"""
获取文件扩展名
Returns:
文件扩展名(不包含点)
"""
filename = self.GetFilename()
if '.' in filename:
return filename.split('.')[-1].lower()
return ""
def ExtensionIs(self, *extensions: str) -> bool:
"""
检查扩展名是否匹配
Args:
*extensions: 要检查的扩展名列表
Returns:
是否匹配任一扩展名
"""
current_ext = self.GetExtension()
return current_ext in [ext.lower().lstrip('.') for ext in extensions]
def Open(self, url: str) -> 'ToolURL':
"""
在当前对象上打开新URL
Args:
url: 新的URL字符串
Returns:
更新后的ToolURL对象
"""
self.url = str(url)
return self
# 文件类型判断属性
@property
def IsText(self) -> bool:
"""是否为文本文件txt, html, htm, css, js, xml, csv"""
return self.ExtensionIs('txt', 'html', 'htm', 'css', 'js', 'xml', 'csv', 'md', 'py', 'java', 'cpp', 'c', 'h')
@property
def IsJson(self) -> bool:
"""是否为JSON文件"""
return self.ExtensionIs('json')
@property
def IsImage(self) -> bool:
"""是否为图像文件jpg, jpeg, png, gif, bmp, svg"""
return self.ExtensionIs('jpg', 'jpeg', 'png', 'gif', 'bmp', 'svg', 'webp')
@property
def IsDocument(self) -> bool:
"""是否为文档文件pdf, doc, docx, xls, xlsx, ppt, pptx"""
return self.ExtensionIs('pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx')
# HTTP请求方法
def Get(self, callback: Callable[[Optional[Any]], None]) -> bool:
"""
同步GET请求
Args:
callback: 响应回调函数成功时接收响应对象失败时接收None
Returns:
是否请求成功
"""
if not self.IsValid:
callback(None)
return False
try:
with urllib.request.urlopen(self.url) as response:
callback(response)
return True
except Exception as e:
callback(None)
return False
def Post(self, callback: Callable[[Optional[Any]], None], form_data: Optional[Dict[str, str]] = None) -> bool:
"""
同步POST请求
Args:
callback: 响应回调函数成功时接收响应对象失败时接收None
form_data: 表单数据字典
Returns:
是否请求成功
"""
if not self.IsValid:
callback(None)
return False
try:
data = None
if form_data:
data = urllib.parse.urlencode(form_data).encode('utf-8')
req = urllib.request.Request(self.url, data=data, method='POST')
if form_data:
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
with urllib.request.urlopen(req) as response:
callback(response)
return True
except Exception as e:
callback(None)
return False
# 异步HTTP请求方法
async def GetAsync(self, callback: Callable[[Optional[Any]], None]) -> bool:
"""
异步GET请求
Args:
callback: 响应回调函数成功时接收响应对象失败时接收None
Returns:
是否请求成功
"""
if not self.IsValid:
callback(None)
return False
try:
async with aiohttp.ClientSession() as session:
async with session.get(self.url) as response:
callback(response)
return True
except Exception as e:
callback(None)
return False
async def PostAsync(self, callback: Callable[[Optional[Any]], None], form_data: Optional[Dict[str, str]] = None) -> bool:
"""
异步POST请求
Args:
callback: 响应回调函数成功时接收响应对象失败时接收None
form_data: 表单数据字典
Returns:
是否请求成功
"""
if not self.IsValid:
callback(None)
return False
try:
async with aiohttp.ClientSession() as session:
async with session.post(self.url, data=form_data) as response:
callback(response)
return True
except Exception as e:
callback(None)
return False
# 内容加载方法
def LoadAsText(self) -> str:
"""
同步加载为文本
Returns:
文本内容
"""
if not self.IsValid:
raise URLValidationError(f"Invalid URL: {self.url}")
try:
with urllib.request.urlopen(self.url) as response:
content = response.read()
# 尝试检测编码
encoding = response.headers.get_content_charset() or 'utf-8'
return content.decode(encoding)
except Exception as e:
raise HTTPRequestError(f"Failed to load text from {self.url}: {str(e)}")
async def LoadAsTextAsync(self) -> str:
"""
异步加载为文本
Returns:
文本内容
"""
if not self.IsValid:
raise URLValidationError(f"Invalid URL: {self.url}")
try:
async with aiohttp.ClientSession() as session:
async with session.get(self.url) as response:
return await response.text()
except Exception as e:
raise HTTPRequestError(f"Failed to load text from {self.url}: {str(e)}")
def LoadAsBinary(self) -> bytes:
"""
同步加载为字节数组
Returns:
二进制内容
"""
if not self.IsValid:
raise URLValidationError(f"Invalid URL: {self.url}")
try:
with urllib.request.urlopen(self.url) as response:
return response.read()
except Exception as e:
raise HTTPRequestError(f"Failed to load binary from {self.url}: {str(e)}")
async def LoadAsBinaryAsync(self) -> bytes:
"""
异步加载为字节数组
Returns:
二进制内容
"""
if not self.IsValid:
raise URLValidationError(f"Invalid URL: {self.url}")
try:
async with aiohttp.ClientSession() as session:
async with session.get(self.url) as response:
return await response.read()
except Exception as e:
raise HTTPRequestError(f"Failed to load binary from {self.url}: {str(e)}")
def LoadAsJson(self, model_type: Optional[type] = None) -> Any:
"""
同步加载并反序列化JSON
Args:
model_type: 可选的Pydantic模型类型
Returns:
JSON数据或模型对象
"""
text_content = self.LoadAsText()
try:
json_data = json.loads(text_content)
if model_type and issubclass(model_type, BaseModel):
return model_type.model_validate(json_data)
return json_data
except json.JSONDecodeError as e:
raise HTTPRequestError(f"Failed to parse JSON from {self.url}: {str(e)}")
async def LoadAsJsonAsync(self, model_type: Optional[type] = None) -> Any:
"""
异步加载并反序列化JSON
Args:
model_type: 可选的Pydantic模型类型
Returns:
JSON数据或模型对象
"""
text_content = await self.LoadAsTextAsync()
try:
json_data = json.loads(text_content)
if model_type and issubclass(model_type, BaseModel):
return model_type.model_validate(json_data)
return json_data
except json.JSONDecodeError as e:
raise HTTPRequestError(f"Failed to parse JSON from {self.url}: {str(e)}")
# 文件保存和下载功能
def Save(self, local_path: Optional[str] = None) -> ToolFile:
"""
自动选择格式保存到本地
Args:
local_path: 本地保存路径如果为None则自动生成
Returns:
保存的文件对象
"""
if local_path is None:
local_path = self.GetFilename() or "downloaded_file"
file_obj = ToolFile(local_path)
file_obj.TryCreateParentPath()
if self.IsText:
return self.SaveAsText(local_path)
elif self.IsJson:
return self.SaveAsJson(local_path)
else:
return self.SaveAsBinary(local_path)
def SaveAsText(self, local_path: Optional[str] = None) -> ToolFile:
"""
保存为文本文件
Args:
local_path: 本地保存路径
Returns:
保存的文件对象
"""
if local_path is None:
local_path = self.GetFilename() or "downloaded.txt"
text_content = self.LoadAsText()
file_obj = ToolFile(local_path)
file_obj.TryCreateParentPath()
file_obj.SaveAsText(text_content)
return file_obj
def SaveAsJson(self, local_path: Optional[str] = None) -> ToolFile:
"""
保存为JSON文件
Args:
local_path: 本地保存路径
Returns:
保存的文件对象
"""
if local_path is None:
local_path = self.GetFilename() or "downloaded.json"
json_data = self.LoadAsJson()
file_obj = ToolFile(local_path)
file_obj.TryCreateParentPath()
file_obj.SaveAsJson(json_data)
return file_obj
def SaveAsBinary(self, local_path: Optional[str] = None) -> ToolFile:
"""
保存为二进制文件
Args:
local_path: 本地保存路径
Returns:
保存的文件对象
"""
if local_path is None:
local_path = self.GetFilename() or "downloaded.bin"
binary_content = self.LoadAsBinary()
file_obj = ToolFile(local_path)
file_obj.TryCreateParentPath()
file_obj.SaveAsBinary(binary_content)
return file_obj
def Download(self, local_path: Optional[str] = None) -> ToolFile:
"""
同步下载文件
Args:
local_path: 本地保存路径
Returns:
下载的文件对象
"""
return self.Save(local_path)
async def DownloadAsync(self, local_path: Optional[str] = None) -> ToolFile:
"""
异步下载文件
Args:
local_path: 本地保存路径
Returns:
下载的文件对象
"""
if local_path is None:
local_path = self.GetFilename() or "downloaded_file"
file_obj = ToolFile(local_path)
file_obj.TryCreateParentPath()
try:
if self.IsText:
content = await self.LoadAsTextAsync()
file_obj.SaveAsText(content)
elif self.IsJson:
content = await self.LoadAsJsonAsync()
file_obj.SaveAsJson(content)
else:
content = await self.LoadAsBinaryAsync()
file_obj.SaveAsBinary(content)
return file_obj
except Exception as e:
raise DownloadError(f"Failed to download {self.url}: {str(e)}")
# 静态HTTP客户端实例避免连接池耗尽
_http_session: Optional[aiohttp.ClientSession] = None
async def get_http_session() -> aiohttp.ClientSession:
"""获取全局HTTP会话实例"""
global _http_session
if _http_session is None or _http_session.closed:
_http_session = aiohttp.ClientSession()
return _http_session
async def close_http_session():
"""关闭全局HTTP会话"""
global _http_session
if _http_session and not _http_session.closed:
await _http_session.close()
_http_session = None