diff --git a/incorporealcms/markdown.py b/incorporealcms/markdown.py index 9527d19..8f753c0 100644 --- a/incorporealcms/markdown.py +++ b/incorporealcms/markdown.py @@ -84,6 +84,21 @@ def parse_md(path: str, pages_root: str): rel_path = os.path.relpath(path, pages_root) + page_name, page_description = _get_metadata_from_parsed_page(md, content, rel_path) + page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX + logger.debug("title (potentially derived): %s", page_title) + + return content, md, page_name, page_title, page_description, mtime + + +def _get_metadata_from_parsed_page(md, content, path: str): + """Get the page name and description from a Markdown object and/or HTML output of a page. + + Args: + md: the parsed Markdown object, potentially including Meta tags + content: the Markdown page content converted to HTML, to run through BeautifulSoup + path: path of the page, to derive the name from as a fallback + """ soup = BeautifulSoup(content, features='lxml') # get the page title first from the markdown tags, second from the first h1, last from the path @@ -93,7 +108,7 @@ def parse_md(path: str, pages_root: str): elif h1_tag := soup.find('h1'): page_name = h1_tag.string elif not page_name: - page_name = instance_resource_path_to_request_path(rel_path) + page_name = instance_resource_path_to_request_path(path) # get the page description from the markdown tags or first paragraph page_description = None @@ -103,10 +118,7 @@ def parse_md(path: str, pages_root: str): if page_description := p_tag.string: page_description = page_description.replace('\n', ' ') - page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX - logger.debug("title (potentially derived): %s", page_title) - - return content, md, page_name, page_title, page_description, mtime + return page_name, page_description def handle_markdown_file_path(path: str, pages_root: str) -> str: @@ -175,9 +187,8 @@ def generate_parent_navs(path, pages_root: str): try: with open(os.path.join(pages_root, path), 'r') as entry_file: entry = entry_file.read() - _ = Markup(md.convert(entry)) # nosec B704 - page_name = (" ".join(md.Meta.get('title')) if md.Meta.get('title') - else request_path_to_breadcrumb_display(request_path)) + content = Markup(md.convert(entry)) # nosec B704 + page_name, _ = _get_metadata_from_parsed_page(md, content, os.path.relpath(path, parent_resource_dir)) return generate_parent_navs(parent_resource_path, pages_root) + [(page_name, request_path)] except FileNotFoundError: return generate_parent_navs(parent_resource_path, pages_root) + [(request_path, request_path)] diff --git a/tests/test_markdown.py b/tests/test_markdown.py index c5a9dfd..addb36e 100644 --- a/tests/test_markdown.py +++ b/tests/test_markdown.py @@ -26,14 +26,21 @@ def test_generate_page_navs_index(): assert generate_parent_navs('index.md', PAGES_DIR) == [('example.org', '/')] +def test_generate_page_navs_title_from_h1(): + """Test that the index page has navs to the root (itself).""" + assert generate_parent_navs('no-title.md', PAGES_DIR) == [('example.org', '/'), + ('this page doesn\'t have a title!', '/no-title')] + + def test_generate_page_navs_subdir_index(): """Test that dir pages have navs to the root and themselves.""" - assert generate_parent_navs('subdir/index.md', PAGES_DIR) == [('example.org', '/'), ('subdir', '/subdir/')] + assert generate_parent_navs('subdir/index.md', PAGES_DIR) == [('example.org', '/'), ('another page', '/subdir/')] def test_generate_page_navs_subdir_real_page(): """Test that real pages have navs to the root, their parent, and themselves.""" - assert generate_parent_navs('subdir/page.md', PAGES_DIR) == [('example.org', '/'), ('subdir', '/subdir/'), + assert generate_parent_navs('subdir/page.md', PAGES_DIR) == [('example.org', '/'), + ('another page', '/subdir/'), ('Page', '/subdir/page')] @@ -42,7 +49,7 @@ def test_generate_page_navs_subdir_with_title_parsing_real_page(): assert generate_parent_navs('subdir-with-title/page.md', PAGES_DIR) == [ ('example.org', '/'), ('SUB!', '/subdir-with-title/'), - ('page', '/subdir-with-title/page') + ('/page', '/subdir-with-title/page') ] @@ -51,7 +58,7 @@ def test_generate_page_navs_subdir_with_no_index(): assert generate_parent_navs('no-index-dir/page.md', PAGES_DIR) == [ ('example.org', '/'), ('/no-index-dir/', '/no-index-dir/'), - ('page', '/no-index-dir/page') + ('/page', '/no-index-dir/page') ]