static site generator part 5 --- consolidate markdown code

all that's left is basically the instance copier code (ssg.py) and
markdown handling (markdown.py)

Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
This commit is contained in:
Brian S. Stephan 2025-03-15 08:53:08 -05:00
parent c9d17523ce
commit 746314f4ed
Signed by: bss
GPG Key ID: 3DE06D3180895FCB
4 changed files with 192 additions and 190 deletions

View File

@ -19,7 +19,7 @@ env = Environment(
)
def init_instance(instance_path: str, test_config: dict = None):
def init_instance(instance_path: str, extra_config: dict = None):
"""Create the instance context, with allowances for customizing path and test settings."""
# load the instance config.json, if there is one
instance_config = os.path.join(instance_path, 'config.json')
@ -29,8 +29,8 @@ def init_instance(instance_path: str, test_config: dict = None):
cprint(f"splicing {config_dict} into the config", 'yellow')
Config.update(config_dict)
if test_config:
Config.update(test_config)
if extra_config:
Config.update(extra_config)
# stash some stuff
Config.INSTANCE_DIR = os.path.abspath(instance_path)

View File

@ -1,72 +0,0 @@
"""Miscellaneous helper functions and whatnot.
SPDX-FileCopyrightText: © 2021 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import datetime
import logging
import os
import re
import markdown
from markupsafe import Markup
from incorporealcms.config import Config
logger = logging.getLogger(__name__)
def get_meta_str(md, key):
"""Provide the page's (parsed in Markup obj md) metadata for the specified key, or '' if unset."""
return " ".join(md.Meta.get(key)) if md.Meta.get(key) else ""
def init_md():
"""Initialize the Markdown parser.
This used to done at the app level in __init__, but extensions like footnotes apparently
assume the parser to only live for the length of parsing one document, and create double
footnote ref links if the one parser sees the same document multiple times.
"""
# initialize markdown parser from config, but include
# extensions our app depends on, like the meta extension
return markdown.Markdown(extensions=Config.MARKDOWN_EXTENSIONS + ['meta'],
extension_configs=Config.MARKDOWN_EXTENSION_CONFIGS)
def instance_resource_path_to_request_path(path):
"""Reverse a relative disk path to the path that would show up in a URL request."""
return '/' + re.sub(r'.md$', '', re.sub(r'index.md$', '', path))
def parse_md(path: str):
"""Given a file to parse, return file content and other derived data along with the md object.
Args:
path: the path to the file to render
"""
try:
logger.debug("opening path '%s'", path)
with open(path, 'r') as input_file:
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(input_file.name), tz=datetime.timezone.utc)
entry = input_file.read()
logger.debug("path '%s' read", path)
md = init_md()
content = Markup(md.convert(entry))
except OSError:
logger.exception("path '%s' could not be opened!", path)
raise
except ValueError:
logger.exception("error parsing/rendering markdown!")
raise
except TypeError:
logger.exception("error loading/rendering markdown!")
raise
logger.debug("file metadata: %s", md.Meta)
page_name = get_meta_str(md, 'title') if md.Meta.get('title') else path
page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
logger.debug("title (potentially derived): %s", page_title)
return content, md, page_name, page_title, mtime

View File

@ -1,21 +1,78 @@
"""General page functionality.
"""Process Markdown pages.
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-FileCopyrightText: © 2025 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import datetime
import logging
import os
import re
import markdown
from markupsafe import Markup
from werkzeug.security import safe_join
from incorporealcms import env
from incorporealcms.config import Config
from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md
logger = logging.getLogger(__name__)
def get_meta_str(md, key):
"""Provide the page's (parsed in Markup obj md) metadata for the specified key, or '' if unset."""
return " ".join(md.Meta.get(key)) if md.Meta.get(key) else ""
def init_md():
"""Initialize the Markdown parser.
This used to done at the app level in __init__, but extensions like footnotes apparently
assume the parser to only live for the length of parsing one document, and create double
footnote ref links if the one parser sees the same document multiple times.
"""
# initialize markdown parser from config, but include
# extensions our app depends on, like the meta extension
return markdown.Markdown(extensions=Config.MARKDOWN_EXTENSIONS + ['meta'],
extension_configs=Config.MARKDOWN_EXTENSION_CONFIGS)
def instance_resource_path_to_request_path(path):
"""Reverse a relative disk path to the path that would show up in a URL request."""
return '/' + re.sub(r'.md$', '', re.sub(r'index.md$', '', path))
def parse_md(path: str):
"""Given a file to parse, return file content and other derived data along with the md object.
Args:
path: the path to the file to render
"""
try:
logger.debug("opening path '%s'", path)
with open(path, 'r') as input_file:
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(input_file.name), tz=datetime.timezone.utc)
entry = input_file.read()
logger.debug("path '%s' read", path)
md = init_md()
content = Markup(md.convert(entry))
except OSError:
logger.exception("path '%s' could not be opened!", path)
raise
except ValueError:
logger.exception("error parsing/rendering markdown!")
raise
except TypeError:
logger.exception("error loading/rendering markdown!")
raise
logger.debug("file metadata: %s", md.Meta)
page_name = get_meta_str(md, 'title') if md.Meta.get('title') else path
page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
logger.debug("title (potentially derived): %s", page_title)
return content, md, page_name, page_title, mtime
def handle_markdown_file_path(path: str) -> str:
"""Given a location on disk, attempt to open it and render the markdown within."""
try:

View File

@ -12,7 +12,132 @@ import tempfile
from termcolor import cprint
from incorporealcms import init_instance
from incorporealcms.pages import handle_markdown_file_path
from incorporealcms.markdown import handle_markdown_file_path
class StaticSiteGenerator(object):
"""Generate static site output based on the instance's content."""
def __init__(self, instance_dir: str, output_dir: str):
"""Create the object to run various operations to generate the static site.
Args:
instance_dir: the directory from which to read an instance format set of content
output_dir: the directory to write the generated static site to
"""
self.instance_dir = instance_dir
self.output_dir = output_dir
instance_dir = os.path.abspath(instance_dir)
output_dir = os.path.abspath(output_dir)
# initialize configuration with the path to the instance
init_instance(instance_dir)
def build(self):
"""Build the whole static site."""
# putting the temporary directory next to the desired output so we can safely rename it later
tmp_output_dir = tempfile.mkdtemp(dir=os.path.dirname(self.output_dir))
cprint(f"creating temporary directory '{tmp_output_dir}' for writing", 'green')
# copy core content
pages_dir = os.path.join(self.instance_dir, 'pages')
self.build_in_destination(pages_dir, tmp_output_dir)
# copy the program's static dir
program_static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'static')
static_output_dir = os.path.join(tmp_output_dir, 'static')
try:
os.mkdir(static_output_dir)
except FileExistsError:
# already exists
pass
self.build_in_destination(program_static_dir, static_output_dir, convert_markdown=False)
# copy the instance's static dir --- should I deprecate this since it could just be stuff in pages/static/?
custom_static_dir = os.path.join(self.instance_dir, 'custom-static')
self.build_in_destination(custom_static_dir, static_output_dir, convert_markdown=False)
# move temporary dir to the destination
old_output_dir = f'{self.output_dir}-old-{os.path.basename(tmp_output_dir)}'
if os.path.exists(self.output_dir):
cprint(f"renaming '{self.output_dir}' to '{old_output_dir}'", 'green')
os.rename(self.output_dir, old_output_dir)
cprint(f"renaming '{tmp_output_dir}' to '{self.output_dir}'", 'green')
os.rename(tmp_output_dir, self.output_dir)
os.chmod(self.output_dir,
stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
# TODO: unlink old dir above? arg flag?
def build_in_destination(self, source_dir: str, dest_dir: str, convert_markdown: bool = True) -> None:
"""Walk the source directory and copy and/or convert its contents into the destination.
Args:
source_dir: the directory to copy into the destination
dest_dir: the directory to place copied/converted files into
convert_markdown: whether or not to convert Markdown files (or simply copy them)
"""
cprint(f"copying files from '{source_dir}' to '{dest_dir}'", 'green')
os.chdir(source_dir)
for base_dir, subdirs, files in os.walk(source_dir):
# remove the absolute path of the pages directory from the base_dir
base_dir = os.path.relpath(base_dir, source_dir)
# create subdirs seen here for subsequent depth
for subdir in subdirs:
dst = os.path.join(dest_dir, base_dir, subdir)
if os.path.islink(os.path.join(base_dir, subdir)):
# keep the link relative to the output directory
src = self.symlink_to_relative_dest(source_dir, os.path.join(base_dir, subdir))
print(f"creating directory symlink '{dst}' -> '{src}'")
os.symlink(src, dst, target_is_directory=True)
else:
print(f"creating directory '{dst}'")
try:
os.mkdir(dst)
except FileExistsError:
# already exists
pass
# process and copy files
for file_ in files:
dst = os.path.join(dest_dir, base_dir, file_)
if os.path.islink(os.path.join(base_dir, file_)):
# keep the link relative to the output directory
src = self.symlink_to_relative_dest(source_dir, os.path.join(base_dir, file_))
print(f"creating symlink '{dst}' -> '{src}'")
os.symlink(src, dst, target_is_directory=False)
else:
src = os.path.join(base_dir, file_)
print(f"copying file '{src}' -> '{dst}'")
shutil.copy2(src, dst)
# render markdown as HTML
if src.endswith('.md') and convert_markdown:
rendered_file = dst.removesuffix('.md') + '.html'
try:
content = handle_markdown_file_path(src)
except UnicodeDecodeError:
# perhaps this isn't a markdown file at all for some reason; we
# copied it above so stick with tha
cprint(f"{src} has invalid bytes! skipping", 'yellow')
continue
with open(rendered_file, 'w') as dst_file:
dst_file.write(content)
def symlink_to_relative_dest(self, base_dir: str, source: str) -> str:
"""Given a symlink, make sure it points to something inside the instance and provide its real destination.
Args:
base_dir: the full absolute path of the instance's pages dir, which the symlink destination must be in.
source: the symlink to check
Returns:
what the symlink points at
"""
if not os.path.realpath(source).startswith(base_dir):
raise ValueError(f"symlink destination {os.path.realpath(source)} is outside the instance!")
# this symlink points to realpath inside base_dir, so relative to base_dir, the symlink dest is...
return os.path.relpath(os.path.realpath(source), base_dir)
def build():
@ -35,113 +160,5 @@ def build():
if os.path.exists(args.output_dir):
raise ValueError(f"specified output path '{args.output_dir}' exists as a file!")
output_dir = os.path.abspath(args.output_dir)
instance_dir = os.path.abspath(args.instance_dir)
# initialize configuration with the path to the instance
init_instance(instance_dir)
# putting the temporary directory next to the desired output so we can safely rename it later
tmp_output_dir = tempfile.mkdtemp(dir=os.path.dirname(output_dir))
cprint(f"creating temporary directory '{tmp_output_dir}' for writing", 'green')
# copy core content
pages_dir = os.path.join(instance_dir, 'pages')
copy_to_destination(pages_dir, tmp_output_dir)
# copy the program's static dir
program_static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'static')
static_output_dir = os.path.join(tmp_output_dir, 'static')
try:
os.mkdir(static_output_dir)
except FileExistsError:
# already exists
pass
copy_to_destination(program_static_dir, static_output_dir, convert_markdown=False)
# copy the instance's static dir --- should I deprecate this since it could just be stuff in pages/static/?
custom_static_dir = os.path.join(instance_dir, 'custom-static')
copy_to_destination(custom_static_dir, static_output_dir, convert_markdown=False)
# move temporary dir to the destination
old_output_dir = f'{output_dir}-old-{os.path.basename(tmp_output_dir)}'
if os.path.exists(output_dir):
cprint(f"renaming '{output_dir}' to '{old_output_dir}'", 'green')
os.rename(output_dir, old_output_dir)
cprint(f"renaming '{tmp_output_dir}' to '{output_dir}'", 'green')
os.rename(tmp_output_dir, output_dir)
os.chmod(output_dir,
stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
# TODO: unlink old dir above? arg flag?
def copy_to_destination(source_dir: str, dest_dir: str, convert_markdown: bool = True) -> None:
"""Walk the source directory and copy and/or convert its contents into the destination.
Args:
source_dir: the directory to copy into the destination
dest_dir: the directory to place copied/converted files into
convert_markdown: whether or not to convert Markdown files (or simply copy them)
"""
cprint(f"copying files from '{source_dir}' to '{dest_dir}'", 'green')
os.chdir(source_dir)
for base_dir, subdirs, files in os.walk(source_dir):
# remove the absolute path of the pages directory from the base_dir
base_dir = os.path.relpath(base_dir, source_dir)
# create subdirs seen here for subsequent depth
for subdir in subdirs:
dst = os.path.join(dest_dir, base_dir, subdir)
if os.path.islink(os.path.join(base_dir, subdir)):
# keep the link relative to the output directory
src = symlink_to_relative_dest(source_dir, os.path.join(base_dir, subdir))
print(f"creating directory symlink '{dst}' -> '{src}'")
os.symlink(src, dst, target_is_directory=True)
else:
print(f"creating directory '{dst}'")
try:
os.mkdir(dst)
except FileExistsError:
# already exists
pass
# process and copy files
for file_ in files:
dst = os.path.join(dest_dir, base_dir, file_)
if os.path.islink(os.path.join(base_dir, file_)):
# keep the link relative to the output directory
src = symlink_to_relative_dest(source_dir, os.path.join(base_dir, file_))
print(f"creating symlink '{dst}' -> '{src}'")
os.symlink(src, dst, target_is_directory=False)
else:
src = os.path.join(base_dir, file_)
print(f"copying file '{src}' -> '{dst}'")
shutil.copy2(src, dst)
# render markdown as HTML
if src.endswith('.md') and convert_markdown:
rendered_file = dst.removesuffix('.md') + '.html'
try:
content = handle_markdown_file_path(src)
except UnicodeDecodeError:
# perhaps this isn't a markdown file at all for some reason; we
# copied it above so stick with tha
cprint(f"{src} has invalid bytes! skipping", 'yellow')
continue
with open(rendered_file, 'w') as dst_file:
dst_file.write(content)
def symlink_to_relative_dest(base_dir: str, source: str) -> str:
"""Given a symlink, make sure it points to something inside the instance and provide its real destination.
Args:
base_dir: the full absolute path of the instance's pages dir, which the symlink destination must be in.
source: the symlink to check
Returns:
what the symlink points at
"""
if not os.path.realpath(source).startswith(base_dir):
raise ValueError(f"symlink destination {os.path.realpath(source)} is outside the instance!")
# this symlink points to realpath inside base_dir, so relative to base_dir, the symlink dest is...
return os.path.relpath(os.path.realpath(source), base_dir)
site_gen = StaticSiteGenerator(args.instance_dir, args.output_dir)
site_gen.build()