use h1-as-title logic while generating breadcrumbs

Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
This commit is contained in:
Brian S. Stephan 2026-01-28 16:08:41 -06:00
parent ee4215ede2
commit 204e7bc416
Signed by: bss
GPG Key ID: 3DE06D3180895FCB
2 changed files with 30 additions and 12 deletions

View File

@ -84,6 +84,21 @@ def parse_md(path: str, pages_root: str):
rel_path = os.path.relpath(path, pages_root) rel_path = os.path.relpath(path, pages_root)
page_name, page_description = _get_metadata_from_parsed_page(md, content, rel_path)
page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
logger.debug("title (potentially derived): %s", page_title)
return content, md, page_name, page_title, page_description, mtime
def _get_metadata_from_parsed_page(md, content, path: str):
"""Get the page name and description from a Markdown object and/or HTML output of a page.
Args:
md: the parsed Markdown object, potentially including Meta tags
content: the Markdown page content converted to HTML, to run through BeautifulSoup
path: path of the page, to derive the name from as a fallback
"""
soup = BeautifulSoup(content, features='lxml') soup = BeautifulSoup(content, features='lxml')
# get the page title first from the markdown tags, second from the first h1, last from the path # get the page title first from the markdown tags, second from the first h1, last from the path
@ -93,7 +108,7 @@ def parse_md(path: str, pages_root: str):
elif h1_tag := soup.find('h1'): elif h1_tag := soup.find('h1'):
page_name = h1_tag.string page_name = h1_tag.string
elif not page_name: elif not page_name:
page_name = instance_resource_path_to_request_path(rel_path) page_name = instance_resource_path_to_request_path(path)
# get the page description from the markdown tags or first paragraph # get the page description from the markdown tags or first paragraph
page_description = None page_description = None
@ -103,10 +118,7 @@ def parse_md(path: str, pages_root: str):
if page_description := p_tag.string: if page_description := p_tag.string:
page_description = page_description.replace('\n', ' ') page_description = page_description.replace('\n', ' ')
page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX return page_name, page_description
logger.debug("title (potentially derived): %s", page_title)
return content, md, page_name, page_title, page_description, mtime
def handle_markdown_file_path(path: str, pages_root: str) -> str: def handle_markdown_file_path(path: str, pages_root: str) -> str:
@ -175,9 +187,8 @@ def generate_parent_navs(path, pages_root: str):
try: try:
with open(os.path.join(pages_root, path), 'r') as entry_file: with open(os.path.join(pages_root, path), 'r') as entry_file:
entry = entry_file.read() entry = entry_file.read()
_ = Markup(md.convert(entry)) # nosec B704 content = Markup(md.convert(entry)) # nosec B704
page_name = (" ".join(md.Meta.get('title')) if md.Meta.get('title') page_name, _ = _get_metadata_from_parsed_page(md, content, os.path.relpath(path, parent_resource_dir))
else request_path_to_breadcrumb_display(request_path))
return generate_parent_navs(parent_resource_path, pages_root) + [(page_name, request_path)] return generate_parent_navs(parent_resource_path, pages_root) + [(page_name, request_path)]
except FileNotFoundError: except FileNotFoundError:
return generate_parent_navs(parent_resource_path, pages_root) + [(request_path, request_path)] return generate_parent_navs(parent_resource_path, pages_root) + [(request_path, request_path)]

View File

@ -26,14 +26,21 @@ def test_generate_page_navs_index():
assert generate_parent_navs('index.md', PAGES_DIR) == [('example.org', '/')] assert generate_parent_navs('index.md', PAGES_DIR) == [('example.org', '/')]
def test_generate_page_navs_title_from_h1():
"""Test that the index page has navs to the root (itself)."""
assert generate_parent_navs('no-title.md', PAGES_DIR) == [('example.org', '/'),
('this page doesn\'t have a title!', '/no-title')]
def test_generate_page_navs_subdir_index(): def test_generate_page_navs_subdir_index():
"""Test that dir pages have navs to the root and themselves.""" """Test that dir pages have navs to the root and themselves."""
assert generate_parent_navs('subdir/index.md', PAGES_DIR) == [('example.org', '/'), ('subdir', '/subdir/')] assert generate_parent_navs('subdir/index.md', PAGES_DIR) == [('example.org', '/'), ('another page', '/subdir/')]
def test_generate_page_navs_subdir_real_page(): def test_generate_page_navs_subdir_real_page():
"""Test that real pages have navs to the root, their parent, and themselves.""" """Test that real pages have navs to the root, their parent, and themselves."""
assert generate_parent_navs('subdir/page.md', PAGES_DIR) == [('example.org', '/'), ('subdir', '/subdir/'), assert generate_parent_navs('subdir/page.md', PAGES_DIR) == [('example.org', '/'),
('another page', '/subdir/'),
('Page', '/subdir/page')] ('Page', '/subdir/page')]
@ -42,7 +49,7 @@ def test_generate_page_navs_subdir_with_title_parsing_real_page():
assert generate_parent_navs('subdir-with-title/page.md', PAGES_DIR) == [ assert generate_parent_navs('subdir-with-title/page.md', PAGES_DIR) == [
('example.org', '/'), ('example.org', '/'),
('SUB!', '/subdir-with-title/'), ('SUB!', '/subdir-with-title/'),
('page', '/subdir-with-title/page') ('/page', '/subdir-with-title/page')
] ]
@ -51,7 +58,7 @@ def test_generate_page_navs_subdir_with_no_index():
assert generate_parent_navs('no-index-dir/page.md', PAGES_DIR) == [ assert generate_parent_navs('no-index-dir/page.md', PAGES_DIR) == [
('example.org', '/'), ('example.org', '/'),
('/no-index-dir/', '/no-index-dir/'), ('/no-index-dir/', '/no-index-dir/'),
('page', '/no-index-dir/page') ('/page', '/no-index-dir/page')
] ]