use beautifulsoup to derive title from HTML h1

Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
This commit is contained in:
2026-01-28 14:08:48 -06:00
parent 3ca13cc6f8
commit 20673c178a
8 changed files with 53 additions and 5 deletions

View File

@@ -13,6 +13,7 @@ import os
import re
import markdown
from bs4 import BeautifulSoup
from markupsafe import Markup
from incorporealcms import jinja_env
@@ -82,7 +83,18 @@ def parse_md(path: str, pages_root: str):
logger.debug("file metadata: %s", md.Meta)
rel_path = os.path.relpath(path, pages_root)
page_name = get_meta_str(md, 'title') if md.Meta.get('title') else instance_resource_path_to_request_path(rel_path)
soup = BeautifulSoup(content, features='lxml')
# get the page title first from the markdown tags, second from the first h1, last from the path
page_name = None
if md.Meta.get('title'):
page_name = get_meta_str(md, 'title')
elif h1_tag := soup.find('h1'):
page_name = h1_tag.string
elif not page_name:
page_name = instance_resource_path_to_request_path(rel_path)
page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
logger.debug("title (potentially derived): %s", page_title)