Cleans up non-ascii jank, corrects host

This commit is contained in:
EamonnMR 2023-06-27 23:05:36 -04:00
parent 165d8be1f3
commit 2ed8ae6bcd
3 changed files with 21 additions and 23 deletions

View file

@ -6,7 +6,7 @@ Python gopher server using [https://github.com/dotcomboom/Pituophis/](https://gi
Pass your desired wordpress site's URL and port with environment variables like so:
`URL='http://wordpress.example.com' PORT='7070' python server.py`
`HOST=gopher.example.com URL='http://wordpress.example.com' PORT='7070' python server.py`
Connect like this:

View file

@ -2,3 +2,4 @@ pituophis
requests
parse
beautifulsoup4
unidecode

View file

@ -8,6 +8,7 @@ from pituophis import Item, Request
from requests import get
from parse import parse
from bs4 import BeautifulSoup
from unidecode import unidecode
wordpress_url = getenv("URL")
@ -16,18 +17,6 @@ formatters = {}
def wrap(text):
return "\n".join(text_wrap(text, width=int(getenv("WIDTH", 80))))
def excise_special_characters(text):
# TODO: Actually use .encode("ascii", "excise_special_characters") and register a custom handler
for char, replacement in {
"": '"',
"": "'",
"": '"',
"w": "w",
}.items():
text = text.replace(char, replacement)
return text
def register_handler(path: str):
def decorator_handler(func):
handlers[path] = func
@ -56,6 +45,7 @@ def p(tag):
@register_formatter
def img(tag):
# TODO: Use image to ascii here
if "alt" in tag:
return f"<Image Omitted: {tag['alt']}>"
return "<Image Omitted (no alt text)>"
@ -63,9 +53,11 @@ def img(tag):
def format_post(post):
post = post[0]
header = f"""
** {post["title"]} **
{post["date"]} - {post["author"]}
** {post["title"]["rendered"]} **
"""
# TODO: Hit the API to get the author's name
# {post["date"]} - {post["author"]}
# TODO: Nicer date formatting
soup = BeautifulSoup(post["content"]["rendered"], features="html.parser")
tags = soup.find_all(formatters.keys())
return header + "\n".join(
@ -84,28 +76,33 @@ def post(request: Request, slug: str):
return format_post(page)
def format_excerpt(post):
text = BeautifulSoup(post["excerpt"]["rendered"]).get_text()
text = BeautifulSoup(post["excerpt"]["rendered"], features="html.parser").get_text()
cutoff = text.find("")
fudge_factor = 100 # Deal with the weird [html] link generated by the penny toys article
return wrap(excise_special_characters(text[:cutoff - fudge_factor] + "..."))
return wrap(unidecode(text[:cutoff - fudge_factor] + "..."))
def handle(request):
print(f"path: {request.path}")
for path, handler in handlers.items():
parse_result = parse(path, request.path)
if parse_result is not None:
return handler(request, **parse_result.named)
menu = [getenv("HEADER_TEXT")]
posts = get(urljoin(wordpress_url, "wp-json/wp/v2/posts?filter[posts_per_page]=-1")).json()
pages = get(urljoin(wordpress_url, "wp-json/wp/v2/pages?filter[posts_per_page]=-1")).json()
#pages = get(urljoin(wordpress_url, "wp-json/wp/v2/pages?filter[posts_per_page]=-1")).json()
for heading, items, url in (
("Pages", pages, "/page/"),
("Posts", posts, "/post/")
#("Pages", pages, "/page/"),
("Posts", posts, "/post/"),
):
menu.append(f"<==={heading}===>")
for i, post in enumerate(items):
menu.append(Item(itype=0, path=f"{url}{post['slug']}", text=unescape(post['title']['rendered']), host=request.host, port=request.port))
for post in items:
menu.append(Item(
itype=0, path=f"{url}{post['slug']}",
text=unidecode(unescape(post['title']['rendered'])),
host=request.host, port=request.port)
)
menu.append(format_excerpt(post))
return menu
if __name__ == '__main__':
pituophis.serve("127.0.0.1", int(getenv("PORT")), handler=handle)
pituophis.serve(getenv("HOST","127.0.0.1"), int(getenv("PORT")), handler=handle)