gem

a fork of khuxkm's gemini to web proxy
Log | Files | Refs

commit 083d4cb8d0fac2b4f267360c3e85706c18c6bd98
Author: xfnw <xfnw@ttm.sh>
Date:   Thu, 10 Dec 2020 16:42:11 -0500

add css and stuff

Diffstat:
A.gitignore | 2++
Agem.css | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agem2html.py | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aindex.cgi | 246+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amimeparse.py | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 485 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +__pycache__/* diff --git a/gem.css b/gem.css @@ -0,0 +1,87 @@ +/* ~xfnw's css! */ +/* gpl affero v3 */ +body { +background: #141415; +font-family: mono; +color: #cdcdcd; +max-width: 60em; +margin: auto; +padding: 10px; +} + +h1, h2, h3 { + font-weight: 100; +} + +a { +text-decoration: none; +color: #acc; +} + +a:hover { +text-decoration: underline; +} + + +code, kbd, pre, .flex div { +background: #222; +color: #eee; +} + +pre, .flex div { +padding: 5px; +overflow:auto; +} + +mark { + background-color: #dc5; +} + +@font-face { + font-family: 'VT323'; + src: url(https://xfnw.ttm.sh/assets/PressStart2P-Regular.ttf); +} + + +summary { + cursor: pointer; + padding: 10px; +} + +th, summary { + background-color: #222; +} + +th, td, details { + border: 1px solid #222; +} +th, td { + padding: 5px; +} + +table, details *:not(summary) { + border-collapse: collapse; + margin: 10px; +} + +.flex { + display: flex; + flex-wrap: wrap; +} + +.flex div { + display:flex; + min-width: 10em; + width: 100%; + margin: 10px; + flex: 1 1 0; + flex-direction: column; +} + +.flex div h3 { + margin: 0; +} +.flex div p { + flex: 1 1 0; +} + diff --git a/gem2html.py b/gem2html.py @@ -0,0 +1,88 @@ +import random, functools, os +from html import escape + +_rand_n = lambda: functools.reduce(lambda x, y: (x<<8)+y,os.urandom(4)) + +ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz" +USED_IDS = set() +def rand_id(): + n = _rand_n() + id = "" + while n>0: + n, index = divmod(n,len(ALPHABET)) + id = ALPHABET[index]+id + if id in USED_IDS: return rand_id() + return id + +def gem2html(content,link_callback=lambda url, text: (url, text)): + lines = content.splitlines() + out = """<!DOCTYPE html> +<html> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <link rel="stylesheet" href="gem.css"></head> +<body> + + """ + pre = False + pre_alt = False + set_title = False + for line in lines: + if pre: + if line[:3]=="```": + pre=False + out+="</pre>\n" + if pre_alt: + out+="</figure>\n" + pre_alt=False + else: + out+=escape(line)+"\n" + else: + if line[:3]=="```": + if len(line)>3: + cap_id = rand_id() + out+="<figure role='img' aria-captionedby='{0}'><figcaption id='{0}' style='clip: rect(0 0 0 0); clip-path: inset(50%); height: 1px; overflow: hidden; position: absolute; white-space: nowrap; width: 1px;'>{1}</figcaption>\n".format(cap_id,escape(line[3:])) + pre_alt = True + pre = True + out+="<pre>\n" + elif line.startswith("#"): + if line[:3]=="###": + out+="<h3>{}</h3>".format(escape(line[3:].strip())) + elif line[:2]=="##": + out+="<h2>{}</h2>".format(escape(line[2:].strip())) + elif line[:1]=="#": + out+="<h1>{}</h1>".format(escape(line[1:].strip())) + if not set_title: + out+="<title>{}</title>".format(escape(line[1:].strip())) + set_title = True + elif line.startswith("* "): + out += "<ul>\n<li>{}</li>\n</ul>\n".format(escape(line[1:].strip())) + # combine consecutive unordered list items into one unordered list + out = out.replace("</ul>\n<ul>\n","") + elif line.startswith("=>"): + parts = line.split(None,2) + try: + url, text = parts[1:] + except ValueError: + try: + url=parts[1] + text=parts[1] + except: + # no link content at all + # just put a literal => in there + out+="<p></p>".format(escape(parts[0])) + continue + # now comes the fun part, use the link callback to mutilate these + url, text = link_callback(url, text) + # and now render + out+="<p><a href='{}'>{}</a></p>".format(escape(url),escape(text)) + elif line.startswith(">"): + out+="<blockquote><p>{}</p></blockquote>".format(escape(line)) + else: # any other line is a text line + if line: + out+="<p>{}</p>".format(escape(line)) + else: + out+="<p><br></p>" + out+="</body>" + return out diff --git a/index.cgi b/index.cgi @@ -0,0 +1,246 @@ +#!/usr/bin/python3 +import cgi, os, json, socket, ssl, threading, time, sys, mimeparse, gem2html +from html import escape +from urllib.parse import urlparse, parse_qs, urlencode, uses_relative, uses_netloc, urlunparse, urljoin +uses_relative.append("gemini") +uses_netloc.append("gemini") + +class StartComparison(str): + def __eq__(self,lhs): + return lhs.startswith(self) + +class ResponseCodes: + # use like: response.status==ResponseCodes.GENERIC_SUCCESS + GENERIC_INPUT = StartComparison("1") + GENERIC_SUCCESS = StartComparison("2") + GENERIC_REDIRECT = StartComparison("3") + GENERIC_TEMPFAIL = StartComparison("4") + GENERIC_PERMFAIL = StartComparison("5") + GENERIC_CERTFAIL = StartComparison("6") + + # the rest of these are just normal strings + INPUT_NEEDED = "10" + INPUT_NEEDED_SENSITIVE = "11" + + SUCCESS = "20" + + REDIRECT_TEMP = "30" + REDIRECT_PERM = "31" + + TEMPFAIL_GENERIC = "40" + TEMPFAIL_SERVER_UNAVAILABLE = "41" + TEMPFAIL_CGI_ERROR = "42" + TEMPFAIL_PROXY_ERROR = "43" + TEMPFAIL_SLOW_DOWN = "44" + + PERMFAIL_GENERIC = "50" + PERMFAIL_NOT_FOUND = "51" + PERMFAIL_GONE = "52" + PERMFAIL_PROXY_REFUSED = "53" + PERMFAIL_BAD_REQUEST = "59" + + CERTFAIL_NEED_CERT = "60" + CERTFAIL_BAD_CERT = "61" + CERTFAIL_INVALID_CERT = "62" + +class ResponseObject: + def __init__(self,status,meta,content): + self.status=status + self.meta=meta + self.content=content + +def connect_factory(url,parsed=None): + if parsed is None: parsed = urlparse(url) + event = threading.Event() + def _connect(): + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ctx.minimum_version = ssl.TLSVersion.TLSv1_2 + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + try: + sock = socket.create_connection((parsed.hostname,parsed.port or 1965),2) + except socket.timeout: + threading.current_thread().ret="Socket connection timed out" + return + sock.settimeout(5) + ssock = ctx.wrap_socket(sock,server_hostname=parsed.hostname) + ssock.sendall((url+"\r\n").encode("utf-8")) + out = b'' + try: + while (data:=ssock.recv(1024)) and not event.is_set(): + out+=data + except socket.timeout: + threading.current_thread().ret="Read timed out, the page is probably too big." + return + ssock.shutdown(socket.SHUT_RDWR) + ssock.close() + header, data = out.split(b"\n",1) + header = header.strip().decode("utf-8") + status, meta = header.split(None,1) + threading.current_thread().ret = ResponseObject(status,meta,data) + return _connect, event + +BASE_URL = "https://xfnw.ttm.sh/gem/?" +def link_callback(lurl, text): + global url + lurl = urljoin(url,lurl) + parsed = urlparse(lurl) + query = None + if parsed.query: + query = parsed.query + parsed = parsed._replace(query=None) + lurl = urlunparse(parsed) + params = dict(addr=lurl) + if query is not None: + params["query"]=query + return BASE_URL+urlencode(params), text + +qs = parse_qs(os.environ["QUERY_STRING"]) + +url = next(iter(qs.get("addr",["gemini://tilde.team/~xfnw/start.gmi"]))) +parsed = urlparse(url) +query = next(iter(qs.get("query",[None]))) or parsed.query +if query: + parsed = parsed._replace(query=query) + url = urlunparse(parsed) + +if parsed.scheme and parsed.scheme!="gemini": + print("Content-Type: text/html") + print() + print("""<!DOCTYPE html> +<html> +<head> +<meta http-equiv="refresh" content="0;URL='{0}'"> +<title>Redirecting...</title> +<meta charset="UTF-8"> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<link rel="stylesheet" href="gem.css"> +</head> +<body> +<p>If the redirect doesn't work, <a href="{0}">click here.</a></p> +</body> +</html>""".format(escape(url))) + sys.exit() + +connect_func, killswitch = connect_factory(url,parsed) +connect_thread = threading.Thread(target=connect_func) +# now here's the fun part +# we'll join the thread with a timeout +connect_thread.start() +connect_thread.join(5) +# now if the thread's still alive, we'll set the killswitch and join the thread again +if connect_thread.is_alive(): + killswitch.set() + connect_thread.join() +# now we know for a fact the thread is done +# get the return value as Thread.ret (done manually in the function definition) +response = getattr(connect_thread,"ret") +if type(response)!=ResponseObject: + print("""Content-Type: text/html + +<!DOCTYPE html> +<html> +<head> +<title>Error...</title> +<meta charset="UTF-8"> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<link rel="stylesheet" href="gem.css"> +</head> +<body> +<pre>""") + print(response or "Unknown error occurred.") + sys.exit() + +if response.status==ResponseCodes.GENERIC_INPUT: + print("Content-Type: text/html") + print() + print("""<!DOCTYPE html> + <html> + <head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1"> + <link rel="stylesheet" href="gem.css"></head> + <title>The page you want to visit has requested input.</title> + </head> + <body> +<p>The page you want to visit has requested input.</p> +<form method='GET' action='./'> +<label for='input_prompt'>{}</label><br> +<input type='{}' name='query' id='input_prompt' autocomplete="off"/> +<input type='hidden' name='addr' value='{}' /> +<button type='submit'>Submit</button> +</form> +<p>I take no responsibility if shoulder-surfers read your input.</p>""".format(escape(response.meta),"password" if response.status==ResponseCodes.INPUT_NEEDED_SENSITIVE else "text",escape(url))) +elif response.status==ResponseCodes.GENERIC_SUCCESS: + mime = response.meta + content = response.content + mimeparsed = mimeparse.parse_mime(mime) + if mimeparsed[0][0]=="text": # mimeparsed is ([type, subtype],parameters) + content = content.decode(mimeparsed[1].get("charset","UTF-8")) + if mimeparsed[0][1]=="html": + mime = mime.replace("text/html","text/plain") + elif mimeparsed[0][1]=="gemini": + mime = "text/html; charset=UTF-8" + content = gem2html.gem2html(content,link_callback) + print("Content-Type: "+mime) + print("") + print(content) + else: + print("Content-Type: "+mime) + print() + sys.stdout.flush() + sys.stdout.buffer.write(content) +elif response.status==ResponseCodes.GENERIC_REDIRECT: + print("Content-Type: text/html") + print() + print("""<!DOCTYPE html> +<html> +<head> +<title>Redirecting...</title> +<meta charset="UTF-8"> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<link rel="stylesheet" href="gem.css"> +</head> +<body> +""") + print("<p>The current page ({}) wishes to redirect you to {}.</p>".format(escape(url),escape(response.meta))) + tmp, tmp2 = link_callback(response.meta,"Click here to follow this redirect.") + print("<p><a href={}>{}</a></p>".format(escape(tmp),escape(tmp2))) +elif response.status==ResponseCodes.GENERIC_TEMPFAIL or response.status==ResponseCodes.GENERIC_PERMFAIL: + print("""Content-Type: text/html + +<!DOCTYPE html> +<html> +<head> +<title>Error...</title> +<meta charset="UTF-8"> +<meta name="viewport" content="width=device-width, initial-scale=1"> +<link rel="stylesheet" href="gem.css"> +</head> +<body> +<pre>""") + msg = "Unknown {} error".format("permanent" if response.status==ResponseCodes.GENERIC_PERMFAIL else "temporary") + if response.status==ResponseCodes.TEMPFAIL_SERVER_UNAVAILABLE: + msg = "Server unavailable" + if response.status==ResponseCodes.TEMPFAIL_CGI_ERROR: + msg = "CGI script error" + if response.status==ResponseCodes.TEMPFAIL_PROXY_ERROR: + msg = "Proxy error" + if response.status==ResponseCodes.TEMPFAIL_SLOW_DOWN: + msg = "Slow down" + if response.status==ResponseCodes.PERMFAIL_NOT_FOUND: + msg = "Not Found" + if response.status==ResponseCodes.PERMFAIL_GONE: + msg = "Gone" + if response.status==ResponseCodes.PERMFAIL_PROXY_REFUSED: + msg = "Proxy request refused" + if response.status==ResponseCodes.PERMFAIL_BAD_REQUEST: + msg = "Bad request" + print(f"Error {response.status}: {msg}") + print(f"Server says: {response.meta}") +elif response.status==ResponseCodes.GENERIC_CERTFAIL: + print("Page requires the use of client certificates, which are outside the scope of this proxy.") +else: + print("Page returned status code {} which is unimplemented.".format(response.status)) + print("META = {!r}".format(response.meta)) + print("Response body = {!r}".format(response.content)) diff --git a/mimeparse.py b/mimeparse.py @@ -0,0 +1,62 @@ +import string +# Utility function to parse a MIME type +def parse_mime(mimetype): + mimetype = mimetype.strip() + index = 0 + type = "" + # type is everything before the / + while index<len(mimetype) and mimetype[index]!="/": + type+=mimetype[index] + index+=1 + index+=1 + subtype = "" + # subtype is everything after the slash and before the semicolon (if the latter exists) + while index<len(mimetype) and mimetype[index]!=";": + subtype+=mimetype[index] + index+=1 + index+=1 + # if there's no semicolon, there are no params + if index>=len(mimetype): return [type,subtype], dict() + params = dict() + while index<len(mimetype): + # skip whitespace + while index<len(mimetype) and mimetype[index] in string.whitespace: + index+=1 + paramName = "" + # the parameter name is everything before the = or ; + while index<len(mimetype) and mimetype[index] not in "=;": + paramName+=mimetype[index] + index+=1 + # if the string is over or there isn't an equals sign, there's no param value + if index>=len(mimetype) or mimetype[index]==";": + index+=1 + params[paramName]=None + continue + # otherwise, grab the param value + index+=1 + paramValue = "" + if mimetype[index]=='"': + index+=1 + while True: + while index<len(mimetype) and mimetype[index] not in '\\"': + paramValue+=mimetype[index] + index+=1 + if index>=len(mimetype): break + c = mimetype[index] + index+=1 + if c=="\\": + if index>=len(mimetype): + paramValue+=c + break + paramValue+=mimetype[index] + index+=1 + else: + break + # skip until next ; + while index<len(mimetype) and mimetype[index]!=";": index+=1 + else: + while index<len(mimetype) and mimetype[index]!=";": + paramValue+=mimetype[index] + index+=1 + if paramName: params[paramName]=paramValue + return [type, subtype], params