Joe1sn's Cabinet

【Muud开发】2.HTTP服务改善

从0到1开发一个勉强能用的python web“框架”

项目地址:https://github.com/Joe1sn/muud

主要改进

6-1 返回报文设置

首先将原来的拼接字符串改为了Response类,实现HTTP报文字段的自定义

这样可以大大简化view.py中的代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
class Response():
def __init__(self, type="json", status_code=200,
reply="") -> None:
self.content_type = content_types[type]
self.status_code = status_code
if type == "json":
self.reply = json.dumps(reply, ensure_ascii=False).encode('unicode_escape').decode()
else: self.reply = reply
self.length = len(self.reply)

def consum(self) -> str:
result = ""
result += "HTTP/1.1 {status_code} {msg}\r\n".format(
status_code=self.status_code, msg=status_code_dict[self.status_code])
result += "Content-Type: {type}\r\n".format(
type = self.content_type)
result += "Content-Length: {length}\r\n".format(
length=self.length)
result += "\r\n"
result += self.reply
return result

这样就可以更简单的编写视图函数

1
2
3
4
5
@http_api
def html_test(http_request):
data = "<html><body><h1>Hello, world!</h1></body></html>"
result = Response(reply=data,type="html")
return result.consum().encode()

不过任然欠缺文件服务,不过在epoll服务器的send部分修改就行了,更复杂的还涉及到文件上传等,后续实现http.server这种服务效果,不然静态网页加载很麻烦。

6-2 设置更多的content_type

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
content_types = {
"text": "text/plain",
"html": "text/html", #HTML文档
"css" : "text/css", #CSS样式表
"js" : "text/javascript", #JavaScript脚本

"json": "application/json",
"pdf": "application/pdf",
"xml": "application/xml",
"bin": "application/octet-stream", #[特质]

"jpeg": "image/jpeg", #JPEG图像
"png": "image/png", #PNG图像
"gif": "image/gif", #GIF图像

"mpeg": "audio/mpeg", #MPEG音频
"wav": "audio/wav", #WAV音频

"mp4" : "video/mp4", #MP4视频
"mpeg" : "video/mpeg", #MPEG视频
}

Response的返回改为bytes类型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
def consum(self) -> bytes:
result = ""
result += "HTTP/1.1 {status_code} {msg}\r\n".format(
status_code=self.status_code, msg=status_code_dict[self.status_code])
result += "Content-Type: {type}\r\n".format(
type = self.content_type)
result += "Content-Length: {length}\r\n".format(
length=self.length)
result += "\r\n"

result = result.encode()
if type(self.reply) == bytes:
result += self.reply
else: result += self.reply.encode()
return result

这样就能直接访问各种文件了,方便下一步渲染

1
2
3
4
5
6
7
@http_api
def file_test(http_request):
result=""
with open(r"/mnt/d/Github/muud/test/test.pdf","rb") as f:
result = f.read()
result = Response(reply=result,type="pdf")
return result.consum()

image-20230603110534674

6-3~4 文件上传

这部分写的挺久的,因为涉及到epoll模型的改善,后面回继续改进这个模块

首先有这两个视图函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
@http_api
def file_upload(http_request):
result=""
with open(r"/mnt/d/Github/muud/test/file_upload.html","rb") as f:
result = f.read()
result = Response(reply=result,type="html")
return result.consum()

@http_api
def upload(http_request):
result=""
info("FILE Content>>>>>>>>>")
# info(http_request.data["len"]/1024,"KB")
name = http_request.data["filename"]
# print(http_request.data["file"][:0x20])
with open(r"/mnt/d/Github/muud/test/"+name,"wb") as f:
result = f.write(http_request.data["file"])
data = "<html><body><h1>okok</h1></body></html>"
result = Response(reply=data,type="html",status_code=200)
return result.consum()

目前参考了Django的上传,把数据结果收集在request中然后处理

那么就涉及HttpRequest的处理

首先新增了两个字段

1
2
self.length = 0     #总长度
self.cur_len = 0 #当前长度

从这个判断文件上传是否完成,然后就是消息的提取

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#文件上传,返回bytes类型数据
elif "multipart" in content_type:
pre_len = len(self.raw_data.split(b"\r\n\r\n")[0])+4
raw_data = self.raw_data[pre_len:]
self.cur_len = len(raw_data)
self.data={"name":"", "filename":"", "file":b"", "len":0}
boundary = b"--" + r_boundary.search(self.raw_data).group(1)
file_info = raw_data.split(boundary)[1].split(b"\r\n")[1]

for attribute in file_info.split(b"; "):
# 获得名字
if b"name" in attribute and b"=" in attribute and not attribute.lower().startswith(b"content"):
if attribute.split(b"=")[0] == b"name":
self.data["name"] = b"".join(attribute.split(b"=")[1:])[1:-1].decode()
elif attribute.split(b"=")[0] == b"filename":
# 获得原始文件名
self.data["filename"] = b"".join(attribute.split(b"=")[1:])[1:-1].decode()

file_type = raw_data.split(boundary)[1].split(b"\r\n")[2]
pre_len = len(file_info) + len(file_type) + 2*4 #前置长度

self.data["file"] = self.raw_data.split(boundary)[1][pre_len:-2]
self.data["len"] = len(self.data["file"])

由于使用了epoll模型,当上传的数据长度小于总长度时继续上传,完成后再进行处理,那么server就会有如下修改

数据可读时继续读取客户端发送过来的数据

1
2
3
4
5
6
7
8
elif event & select.EPOLLIN:
# 有数据可读
try:
data = b""
data = connections[fileno].recv(1024*1024*10)
# print("data from server\n",data)
if data:
requests[fileno] += data

由于我的epoll对每个发送过来的package都有一个response,所以需要修改

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 处理请求并生成响应
for fileno, data in requests.items():
if b"HTTP" in data or b"http" in data:
# 解析请求头部
http = HTTPRequest(data=data, fileno=fileno, connections=connections)

if http.cur_len < http.length:
pass
else:
http.show()
http_route = HTTPRouter(http)
response = http_route.route()
responses[fileno] = response

connections[fileno].send(response)

# 构造响应头部和内容
# 清空请求缓冲区
response = b''
data = b''
requests[fileno] = b''

image-20230604123749998

然后测试了下效率,面对小文件的时候效率还说得过去(<10MB),大文件的话又是使用的是对应fd的data进行拼接,增大了内存开销和CPU开销(不知道使用c的指针会不会快一些),这也是后续需要优化的东西

小文件

大文件大文件2

更大的文件后面会出现指数级下降,不过这里也可以看出来基于TCP的拥塞控制,单次发包最大为128kb,分块传输,后续可以针对这些特性进行改进。

6-6 重定向

1
2
3
4
5
@http_api
def redirect(http_request):
data= "http://www.qq.com"
result = Response(reply=data,type="text",status_code=302)
return result.consum()

主要就是通过302跳转进行重定向,然后在返回报文哪里设置了几个新的参数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
def consum(self) -> bytes:
result = ""
if self.status_code == 302:
result += "HTTP/1.1 {status_code} {msg}\r\nLocation: {location}\r\n\r\n".format(
status_code=self.status_code, msg=status_code_dict[self.status_code], location=self.reply)
else:
result += "HTTP/1.1 {status_code} {msg}\r\n".format(
status_code=self.status_code, msg=status_code_dict[self.status_code])
result += "Content-Type: {type}\r\n".format(
type = self.content_type)
result += "Content-Length: {length}\r\n".format(
length=self.length)
result += "\r\n"

result = result.encode()
if self.status_code == 302:
pass
else:
if type(self.reply) == bytes:
result += self.reply
else: result += self.reply.encode()
return result

之后可以在utils里面打包这些方法,我这里放在http_response下面

1
2
3
4
5
# 重定向
def redirect(http_url):
data= str(http_url)
result = Response(reply=data,type="text",status_code=302)
return result.consum()