HTTP Server
I wanted to see how difficult it would be to implement a simple HTTP/1.0 server. I've come accross rfc1945 in the past, but never actually read it properly.
I am a self-taught programmer and lower level network programming is something I've not had much experience with. I did get my feet wet last year with the protohackers network challenges, but there's a lot I don't know about.
My goal was to avoid reading any tutorials, and just work off of the RFC and the socket library. There's nothing wrong with following tutorials, I just wanted to challenge myself.
Here's what I ended up with after a couple of hours.
http.py
#!/usr/bin/env python
# HTTP/1.0 implementation from RFC1945
# https://datatracker.ietf.org/doc/html/rfc1945
import dataclasses
import socket
from datetime import datetime
@dataclasses.dataclass
class Request:
method: str
path: str
protocol: str
headers: dict[str, str]
body: str
@property
def uri(self) -> str:
return "http://" + self.headers["HOST"] + self.path
@classmethod
def make(cls, raw: bytes) -> "Request":
decoded = raw.decode()
# todo: validate line
headers = {}
body = None
splitted = decoded.split("\n")
# todo: validate
method, path, protocol = splitted[0].strip().split(" ")
headers_done = False
done = False
for item in splitted[1:]:
if item == "" and headers_done is False:
headers_done = True
body = item
elif item == "" and done is False:
done = True
else:
splitted_headers = item.split(":")
if len(splitted_headers) < 2:
print("NOOOO", splitted_headers)
continue
key, *value = splitted_headers
headers[key.upper()] = ":".join(value).strip()
return cls(
method=method,
path=path,
protocol=protocol,
headers=headers,
body=body
)
@dataclasses.dataclass
class Response:
request: Request
status: int
headers: dict[str, str]
body: bytes
@classmethod
def make(cls, request: Request, status: int, body: bytes | None, **headers) -> "Response":
# Date: Tue, 15 Nov 1994 08:12:31 GMT
datefmt = "%a, %d %b %Y %H:%M:%S GMT"
datestr = datetime.now().strftime(datefmt)
headers.update({
"Location": request.uri,
"Server": "keshab/0.1",
"Allow": "GET",
"Content-Type": "text/html",
"Content-Length": len(body),
"Date": datestr,
"Last-Modified": datestr
})
return Response(request=request, status=status, headers=headers, body=body)
def encode(self):
headerstr = "\n".join([
f"{key}: {value}" for key, value in self.headers.items()
])
resp = f"{self.request.protocol} {self.status}\n{headerstr}\n"
return resp.encode() + self.body
def handle_request(data: bytes) -> Response:
print("\nSTART\n", data.decode(), "\nEND")
request = Request.make(data)
datestr = datetime.now().isoformat()
response_body = f"""
<html><body>
<h1>Hello world</h1>
<h2>Now: {datestr}</h2>
</body>
</html>
""".encode()
response = Response.make(
request=request,
status=200,
body=response_body,
)
return response
def runserver(host: str, port: int):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((host, port))
print(f"Listening at http://{host}:{port}")
s.listen(1)
while True:
conn, addr = s.accept()
while True:
data = conn.recv(1024)
if not data:
break
try:
response = handle_request(data)
encoded = response.encode()
conn.sendall(encoded)
except Exception as err:
print(f"Error while handling request: {err}")
break
try:
conn.close()
except Exception:
pass
try:
s.close()
except Exception:
pass
if __name__ == "__main__":
runserver(host="127.0.0.1", port=6111)
and running it is just
./http.py
The RFC was actually really easy to follow. I didn't actually
read everything though, and I was aware of the major concepts
already. But I did learn about the From
header for the
first time.
The code snippet above is too long for what it actually does. This same functionality could be compressed into a far fewer lines but I'm too lazy to refactor that now.
I want to move onto handling multiple requests and use asyncio
for an extra bit of challenge.
with asyncio
The asyncio
library seems slightly more ergonomic
compared to socket
.
http2.py
#!/usr/bin/env python
# https://datatracker.ietf.org/doc/html/rfc1945
import socket
import asyncio
import socket
from datetime import datetime
async def handle_request(reader, writer):
data = await reader.read(1024)
message = data.decode()
addr = writer.get_extra_info("peername")
datefmt = "%a, %d %b %Y %H:%M:%S GMT"
datestr = datetime.now().strftime(datefmt)
body = b"<html><body><h1>Hello</h1></body></html>"
resp = (
f"HTTP/1.1 200 OK\n"
"Location: http://localhost:6111/\n"
"Server: keshab/0.2\n"
"Allow: GET\n"
"Content-Type: text/html\n"
f"Content-Length: {len(body)}\n"
f"Date: {datestr}\n"
).encode()
writer.write(resp + b"\n" + body)
await writer.drain()
writer.close()
await writer.wait_closed()
async def runserver():
server = await asyncio.start_server(
handle_request,
"127.0.0.1",
6111,
reuse_address=True,
reuse_port=True,
)
print(f"Listening on 6111")
async with server:
await server.serve_forever()
if __name__ == "__main__":
asyncio.run(runserver())
slightly better server
The server above, while useful as a toy example, is not really doing much. It also contains a major bug in the request parser, but that's a problem for now. I think asyncio might be a good way to implement a simple key and value database.
httpdb.py
#!/usr/bin/env python
# https://datatracker.ietf.org/doc/html/rfc1945
import json
import socket
import asyncio
import socket
from datetime import datetime
def get_request(raw: bytes):
# a pathetic way to parse request
req = {"headers": {}, "body": b""}
decoded = raw.decode()
splitted = decoded.split("\r")
# todo: validate
method, path, protocol = splitted[0].strip().split(" ")
req.update(method=method.upper(), path=path, protocol=protocol)
headers_done = False
for item in splitted[1:]:
if item == "\n" and headers_done is False:
headers_done = True
continue
if headers_done:
req["body"] += item.encode()
else:
items = item.strip().split(":")
if len(items) < 2:
continue
key, *value = items
req["headers"][key.upper()] = ":".join(value).strip()
return req
DB = {
"/": ("text/text", b"httpdb, keshab/0.3"),
}
async def handle_post(req, reader, writer):
key = req["path"]
if key in DB:
status = 200
else:
status = 201
ctype = req["headers"].get("CONTENT-TYPE", "text/text")
data = req["body"]
if ctype == "application/json":
try:
data = json.loads(req["body"])
except Exception as err:
return get_response(req, 400, str(err).encode())
DB[key] = (ctype, data)
return get_response(req, status, b"")
async def handle_get(req, reader, writer):
key = req["path"]
def default(a):
if isinstance(a, bytes):
return a.decode()
return str(a)
if key == "/_all":
everything = json.dumps(DB, default=default).encode()
return get_response(req, 200, everything, "application/json")
if key not in DB:
return get_response(req, 404, f"Key '{key}' not found.".encode())
ctype, value = DB[key]
if ctype == "application/json":
value = json.dumps(value).encode()
return get_response(req, 200, value, ctype)
def get_response(req: dict, status: int, body: bytes | None, content_type: str="text/text") -> bytes:
datefmt = "%a, %d %b %Y %H:%M:%S GMT"
datestr = datetime.now().strftime(datefmt)
uri = "http" + req["headers"]["HOST"] + req["path"]
resp = (
f"HTTP/1.1 {status}\n"
f"Location: {uri}\n"
"Server: keshab/0.3\n"
"Allow: GET,POST\n"
f"Content-Type: {content_type}\n"
f"Content-Length: {len(body)}\n"
f"Date: {datestr}\n"
).encode()
return resp + b"\n" + body
async def handle_request(reader, writer):
data = await reader.read(1024)
req = get_request(data)
protected = ["/_all"]
not_implemented = f"Method {req['method']} Not Implemented\n".encode()
if req["method"] == "GET":
resp = await handle_get(req, reader, writer)
elif req["method"] == "POST":
if req["path"] in protected:
resp = get_response(req, 501, not_implemented)
else:
resp = await handle_post(req, reader, writer)
else:
resp = get_response(req, 501, not_implemented)
writer.write(resp)
await writer.drain()
writer.close()
await writer.wait_closed()
async def runserver():
server = await asyncio.start_server(
handle_request,
"127.0.0.1",
6111,
reuse_address=True,
reuse_port=True,
)
print(f"Listening on 6111")
async with server:
await server.serve_forever()
if __name__ == "__main__":
asyncio.run(runserver())
As you can see, I went a bit too far with this one. I also added a little bit of extra spice with the json parser. This http based database server recognises the need to have a valid json so it parses it before storing.
Save a simple key
POST http://localhost:6111/hello
Content-Type: text/text
K: A quick brown fox jumps over the lazy dog.
GET the value
GET http://localhost:6111/hello
Save a json string
POST http://localhost:6111/json
Content-Type: application/json
{"hello": "world"}
and get it back
GET http://localhost:6111/json