use h1-as-title logic while generating breadcrumbs

Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
2026-01-28 16:08:41 -06:00
parent ee4215ede2
commit 204e7bc416
2 changed files with 30 additions and 12 deletions
--- a/incorporealcms/markdown.py
+++ b/incorporealcms/markdown.py
@@ -84,6 +84,21 @@ def parse_md(path: str, pages_root: str):

    rel_path = os.path.relpath(path, pages_root)

+    page_name, page_description = _get_metadata_from_parsed_page(md, content, rel_path)
+    page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
+    logger.debug("title (potentially derived): %s", page_title)
+
+    return content, md, page_name, page_title, page_description, mtime
+
+
+def _get_metadata_from_parsed_page(md, content, path: str):
+    """Get the page name and description from a Markdown object and/or HTML output of a page.
+
+    Args:
+        md: the parsed Markdown object, potentially including Meta tags
+        content: the Markdown page content converted to HTML, to run through BeautifulSoup
+        path: path of the page, to derive the name from as a fallback
+    """
    soup = BeautifulSoup(content, features='lxml')

    # get the page title first from the markdown tags, second from the first h1, last from the path
@@ -93,7 +108,7 @@ def parse_md(path: str, pages_root: str):
    elif h1_tag := soup.find('h1'):
        page_name = h1_tag.string
    elif not page_name:
-        page_name = instance_resource_path_to_request_path(rel_path)
+        page_name = instance_resource_path_to_request_path(path)

    # get the page description from the markdown tags or first paragraph
    page_description = None
@@ -103,10 +118,7 @@ def parse_md(path: str, pages_root: str):
        if page_description := p_tag.string:
            page_description = page_description.replace('\n', ' ')

-    page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
-    logger.debug("title (potentially derived): %s", page_title)
-
-    return content, md, page_name, page_title, page_description, mtime
+    return page_name, page_description


 def handle_markdown_file_path(path: str, pages_root: str) -> str:
@@ -175,9 +187,8 @@ def generate_parent_navs(path, pages_root: str):
        try:
            with open(os.path.join(pages_root, path), 'r') as entry_file:
                entry = entry_file.read()
-            _ = Markup(md.convert(entry))       # nosec B704
-            page_name = (" ".join(md.Meta.get('title')) if md.Meta.get('title')
-                         else request_path_to_breadcrumb_display(request_path))
+            content = Markup(md.convert(entry))  # nosec B704
+            page_name, _ = _get_metadata_from_parsed_page(md, content, os.path.relpath(path, parent_resource_dir))
            return generate_parent_navs(parent_resource_path, pages_root) + [(page_name, request_path)]
        except FileNotFoundError:
            return generate_parent_navs(parent_resource_path, pages_root) + [(request_path, request_path)]