Fancy tag parsing

Thanks to this post: https://mastodon.social/@makemoremusic/110545160558311215
This commit is contained in:
EamonnMR 2023-06-22 00:03:44 -04:00
parent 23866389fd
commit 453287e723
2 changed files with 55 additions and 26 deletions

View file

@ -8,11 +8,14 @@ Pass your desired wordpress site's URL and port with environment variables like
`URL='http://wordpress.example.com' PORT='7070' python server.py` `URL='http://wordpress.example.com' PORT='7070' python server.py`
Connect like this:
`lynx gopher://localhost:7070`
## Why ## Why
Because I'm not just a luddite, I'm an incredibly lazy luddite. Because I'm not just a luddite, I'm an incredibly lazy luddite.
## Project status ## Project status
Experimental. Lacks basic capabilities; right now it's just a menu with your last ten posts. Experimental. Just shows posts and pages. Could do a lot more cleanup, maybe adding a menu within each page to extract all of the links or something.

View file

@ -11,8 +11,8 @@ from bs4 import BeautifulSoup
wordpress_url = getenv("URL") wordpress_url = getenv("URL")
handlers = {} handlers = {}
formatters = {}
def register_handler(path: str): def register_handler(path: str):
def decorator_handler(func): def decorator_handler(func):
@ -20,37 +20,63 @@ def register_handler(path: str):
return func return func
return decorator_handler return decorator_handler
@register_handler("/post/{id}") def register_formatter(func):
def post(request: Request, id: int): formatters[func.__name__] = func
post = get(urljoin(wordpress_url, f"wp-json/wp/v2/posts/{id}")).json() return func
return BeautifulSoup(post["content"]["rendered"]).get_text()
@register_formatter
def h1(tag):
return f"\n === {tag.get_text()} === \n"
@register_formatter
def h2(tag):
return f"\n == {tag.get_text()} == \n"
@register_formatter
def h3(tag):
return f"\n = {tag.get_text()} = \n"
@register_formatter
def p(tag):
return tag.get_text()
@register_formatter
def img(_tag):
return "(Image Omitted)"
def format_post(post):
soup = BeautifulSoup(post[0]["content"]["rendered"], features="html.parser")
tags = soup.find_all(["h1", "h2", "h3", "p", "img"])
return "\n".join(formatters[tag.name](tag) for tag in tags)
@register_handler("/post/{slug}")
def post(request: Request, slug: str):
post = get(urljoin(wordpress_url, f"wp-json/wp/v2/posts?slug={slug}")).json()
return format_post(post)
@register_handler("/page/{slug}")
def post(request: Request, slug: str):
page = get(urljoin(wordpress_url, f"wp-json/wp/v2/pages?slug={slug}")).json()
return format_post(page)
def handle(request): def handle(request):
for path, handler in handlers.items(): for path, handler in handlers.items():
parse_result = parse(path, request.path) parse_result = parse(path, request.path)
if parse_result is not None: if parse_result is not None:
return handler(request, **parse_result.named) return handler(request, **parse_result.named)
menu = [getenv("HEADER_TEXT")]
posts = get(urljoin(wordpress_url, "wp-json/wp/v2/posts")).json() posts = get(urljoin(wordpress_url, "wp-json/wp/v2/posts?filter[posts_per_page]=-1")).json()
pages = get(urljoin(wordpress_url, "wp-json/wp/v2/pages?filter[posts_per_page]=-1")).json()
menu = [ for heading, items, url in (
Item(itype=0, path=f"/post/{post['id']}", text=unescape(post['title']['rendered']), host=request.host, port=request.port) ("Pages", pages, "/page/"),
for i, post in enumerate(posts) ("Posts", posts, "/post/")
):
menu.append(f"<==={heading}===>")
menu += [
Item(itype=0, path=f"{url}{post['slug']}", text=unescape(post['title']['rendered']), host=request.host, port=request.port)
for i, post in enumerate(items)
] ]
return menu return menu
if __name__ == '__main__': if __name__ == '__main__':
pituophis.serve("127.0.0.1", int(getenv("PORT")), handler=handle) pituophis.serve("127.0.0.1", int(getenv("PORT")), handler=handle)
# Itypes:
# 0: FILE
# 1: dir
# 2: CSO
# 3: UNKN
# 4: HQX
# 5: BIN
# 6: UUE
# 7: ?
# 8: TEL
# 9: BIN