use beautifulsoup to derive title from HTML h1
Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
This commit is contained in:
@@ -13,6 +13,7 @@ import os
|
||||
import re
|
||||
|
||||
import markdown
|
||||
from bs4 import BeautifulSoup
|
||||
from markupsafe import Markup
|
||||
|
||||
from incorporealcms import jinja_env
|
||||
@@ -82,7 +83,18 @@ def parse_md(path: str, pages_root: str):
|
||||
logger.debug("file metadata: %s", md.Meta)
|
||||
|
||||
rel_path = os.path.relpath(path, pages_root)
|
||||
page_name = get_meta_str(md, 'title') if md.Meta.get('title') else instance_resource_path_to_request_path(rel_path)
|
||||
|
||||
soup = BeautifulSoup(content, features='lxml')
|
||||
|
||||
# get the page title first from the markdown tags, second from the first h1, last from the path
|
||||
page_name = None
|
||||
if md.Meta.get('title'):
|
||||
page_name = get_meta_str(md, 'title')
|
||||
elif h1_tag := soup.find('h1'):
|
||||
page_name = h1_tag.string
|
||||
elif not page_name:
|
||||
page_name = instance_resource_path_to_request_path(rel_path)
|
||||
|
||||
page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
|
||||
logger.debug("title (potentially derived): %s", page_title)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user