From 746314f4edfed5616c8dcfd529ffb332f3f620c2 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 15 Mar 2025 08:53:08 -0500 Subject: [PATCH] static site generator part 5 --- consolidate markdown code all that's left is basically the instance copier code (ssg.py) and markdown handling (markdown.py) Signed-off-by: Brian S. Stephan --- incorporealcms/__init__.py | 6 +- incorporealcms/lib.py | 72 ------- incorporealcms/{pages.py => markdown.py} | 65 +++++- incorporealcms/ssg.py | 239 ++++++++++++----------- 4 files changed, 192 insertions(+), 190 deletions(-) delete mode 100644 incorporealcms/lib.py rename incorporealcms/{pages.py => markdown.py} (63%) diff --git a/incorporealcms/__init__.py b/incorporealcms/__init__.py index 7bc4145..3aee430 100644 --- a/incorporealcms/__init__.py +++ b/incorporealcms/__init__.py @@ -19,7 +19,7 @@ env = Environment( ) -def init_instance(instance_path: str, test_config: dict = None): +def init_instance(instance_path: str, extra_config: dict = None): """Create the instance context, with allowances for customizing path and test settings.""" # load the instance config.json, if there is one instance_config = os.path.join(instance_path, 'config.json') @@ -29,8 +29,8 @@ def init_instance(instance_path: str, test_config: dict = None): cprint(f"splicing {config_dict} into the config", 'yellow') Config.update(config_dict) - if test_config: - Config.update(test_config) + if extra_config: + Config.update(extra_config) # stash some stuff Config.INSTANCE_DIR = os.path.abspath(instance_path) diff --git a/incorporealcms/lib.py b/incorporealcms/lib.py deleted file mode 100644 index d1d75ea..0000000 --- a/incorporealcms/lib.py +++ /dev/null @@ -1,72 +0,0 @@ -"""Miscellaneous helper functions and whatnot. - -SPDX-FileCopyrightText: © 2021 Brian S. Stephan -SPDX-License-Identifier: AGPL-3.0-or-later -""" -import datetime -import logging -import os -import re - -import markdown -from markupsafe import Markup - -from incorporealcms.config import Config - -logger = logging.getLogger(__name__) - - -def get_meta_str(md, key): - """Provide the page's (parsed in Markup obj md) metadata for the specified key, or '' if unset.""" - return " ".join(md.Meta.get(key)) if md.Meta.get(key) else "" - - -def init_md(): - """Initialize the Markdown parser. - - This used to done at the app level in __init__, but extensions like footnotes apparently - assume the parser to only live for the length of parsing one document, and create double - footnote ref links if the one parser sees the same document multiple times. - """ - # initialize markdown parser from config, but include - # extensions our app depends on, like the meta extension - return markdown.Markdown(extensions=Config.MARKDOWN_EXTENSIONS + ['meta'], - extension_configs=Config.MARKDOWN_EXTENSION_CONFIGS) - - -def instance_resource_path_to_request_path(path): - """Reverse a relative disk path to the path that would show up in a URL request.""" - return '/' + re.sub(r'.md$', '', re.sub(r'index.md$', '', path)) - - -def parse_md(path: str): - """Given a file to parse, return file content and other derived data along with the md object. - - Args: - path: the path to the file to render - """ - try: - logger.debug("opening path '%s'", path) - with open(path, 'r') as input_file: - mtime = datetime.datetime.fromtimestamp(os.path.getmtime(input_file.name), tz=datetime.timezone.utc) - entry = input_file.read() - logger.debug("path '%s' read", path) - md = init_md() - content = Markup(md.convert(entry)) - except OSError: - logger.exception("path '%s' could not be opened!", path) - raise - except ValueError: - logger.exception("error parsing/rendering markdown!") - raise - except TypeError: - logger.exception("error loading/rendering markdown!") - raise - - logger.debug("file metadata: %s", md.Meta) - - page_name = get_meta_str(md, 'title') if md.Meta.get('title') else path - page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX - logger.debug("title (potentially derived): %s", page_title) - - return content, md, page_name, page_title, mtime diff --git a/incorporealcms/pages.py b/incorporealcms/markdown.py similarity index 63% rename from incorporealcms/pages.py rename to incorporealcms/markdown.py index 659cc93..9c9723f 100644 --- a/incorporealcms/pages.py +++ b/incorporealcms/markdown.py @@ -1,21 +1,78 @@ -"""General page functionality. +"""Process Markdown pages. -SPDX-FileCopyrightText: © 2020 Brian S. Stephan +SPDX-FileCopyrightText: © 2025 Brian S. Stephan SPDX-License-Identifier: AGPL-3.0-or-later """ +import datetime import logging import os +import re +import markdown from markupsafe import Markup -from werkzeug.security import safe_join from incorporealcms import env from incorporealcms.config import Config -from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md logger = logging.getLogger(__name__) +def get_meta_str(md, key): + """Provide the page's (parsed in Markup obj md) metadata for the specified key, or '' if unset.""" + return " ".join(md.Meta.get(key)) if md.Meta.get(key) else "" + + +def init_md(): + """Initialize the Markdown parser. + + This used to done at the app level in __init__, but extensions like footnotes apparently + assume the parser to only live for the length of parsing one document, and create double + footnote ref links if the one parser sees the same document multiple times. + """ + # initialize markdown parser from config, but include + # extensions our app depends on, like the meta extension + return markdown.Markdown(extensions=Config.MARKDOWN_EXTENSIONS + ['meta'], + extension_configs=Config.MARKDOWN_EXTENSION_CONFIGS) + + +def instance_resource_path_to_request_path(path): + """Reverse a relative disk path to the path that would show up in a URL request.""" + return '/' + re.sub(r'.md$', '', re.sub(r'index.md$', '', path)) + + +def parse_md(path: str): + """Given a file to parse, return file content and other derived data along with the md object. + + Args: + path: the path to the file to render + """ + try: + logger.debug("opening path '%s'", path) + with open(path, 'r') as input_file: + mtime = datetime.datetime.fromtimestamp(os.path.getmtime(input_file.name), tz=datetime.timezone.utc) + entry = input_file.read() + logger.debug("path '%s' read", path) + md = init_md() + content = Markup(md.convert(entry)) + except OSError: + logger.exception("path '%s' could not be opened!", path) + raise + except ValueError: + logger.exception("error parsing/rendering markdown!") + raise + except TypeError: + logger.exception("error loading/rendering markdown!") + raise + + logger.debug("file metadata: %s", md.Meta) + + page_name = get_meta_str(md, 'title') if md.Meta.get('title') else path + page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX + logger.debug("title (potentially derived): %s", page_title) + + return content, md, page_name, page_title, mtime + + def handle_markdown_file_path(path: str) -> str: """Given a location on disk, attempt to open it and render the markdown within.""" try: diff --git a/incorporealcms/ssg.py b/incorporealcms/ssg.py index 6b88a88..2122f36 100644 --- a/incorporealcms/ssg.py +++ b/incorporealcms/ssg.py @@ -12,7 +12,132 @@ import tempfile from termcolor import cprint from incorporealcms import init_instance -from incorporealcms.pages import handle_markdown_file_path +from incorporealcms.markdown import handle_markdown_file_path + + +class StaticSiteGenerator(object): + """Generate static site output based on the instance's content.""" + + def __init__(self, instance_dir: str, output_dir: str): + """Create the object to run various operations to generate the static site. + + Args: + instance_dir: the directory from which to read an instance format set of content + output_dir: the directory to write the generated static site to + """ + self.instance_dir = instance_dir + self.output_dir = output_dir + + instance_dir = os.path.abspath(instance_dir) + output_dir = os.path.abspath(output_dir) + + # initialize configuration with the path to the instance + init_instance(instance_dir) + + def build(self): + """Build the whole static site.""" + # putting the temporary directory next to the desired output so we can safely rename it later + tmp_output_dir = tempfile.mkdtemp(dir=os.path.dirname(self.output_dir)) + cprint(f"creating temporary directory '{tmp_output_dir}' for writing", 'green') + + # copy core content + pages_dir = os.path.join(self.instance_dir, 'pages') + self.build_in_destination(pages_dir, tmp_output_dir) + + # copy the program's static dir + program_static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'static') + static_output_dir = os.path.join(tmp_output_dir, 'static') + try: + os.mkdir(static_output_dir) + except FileExistsError: + # already exists + pass + self.build_in_destination(program_static_dir, static_output_dir, convert_markdown=False) + + # copy the instance's static dir --- should I deprecate this since it could just be stuff in pages/static/? + custom_static_dir = os.path.join(self.instance_dir, 'custom-static') + self.build_in_destination(custom_static_dir, static_output_dir, convert_markdown=False) + + # move temporary dir to the destination + old_output_dir = f'{self.output_dir}-old-{os.path.basename(tmp_output_dir)}' + if os.path.exists(self.output_dir): + cprint(f"renaming '{self.output_dir}' to '{old_output_dir}'", 'green') + os.rename(self.output_dir, old_output_dir) + cprint(f"renaming '{tmp_output_dir}' to '{self.output_dir}'", 'green') + os.rename(tmp_output_dir, self.output_dir) + os.chmod(self.output_dir, + stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + # TODO: unlink old dir above? arg flag? + + def build_in_destination(self, source_dir: str, dest_dir: str, convert_markdown: bool = True) -> None: + """Walk the source directory and copy and/or convert its contents into the destination. + + Args: + source_dir: the directory to copy into the destination + dest_dir: the directory to place copied/converted files into + convert_markdown: whether or not to convert Markdown files (or simply copy them) + """ + cprint(f"copying files from '{source_dir}' to '{dest_dir}'", 'green') + os.chdir(source_dir) + for base_dir, subdirs, files in os.walk(source_dir): + # remove the absolute path of the pages directory from the base_dir + base_dir = os.path.relpath(base_dir, source_dir) + # create subdirs seen here for subsequent depth + for subdir in subdirs: + dst = os.path.join(dest_dir, base_dir, subdir) + if os.path.islink(os.path.join(base_dir, subdir)): + # keep the link relative to the output directory + src = self.symlink_to_relative_dest(source_dir, os.path.join(base_dir, subdir)) + print(f"creating directory symlink '{dst}' -> '{src}'") + os.symlink(src, dst, target_is_directory=True) + else: + print(f"creating directory '{dst}'") + try: + os.mkdir(dst) + except FileExistsError: + # already exists + pass + + # process and copy files + for file_ in files: + dst = os.path.join(dest_dir, base_dir, file_) + if os.path.islink(os.path.join(base_dir, file_)): + # keep the link relative to the output directory + src = self.symlink_to_relative_dest(source_dir, os.path.join(base_dir, file_)) + print(f"creating symlink '{dst}' -> '{src}'") + os.symlink(src, dst, target_is_directory=False) + else: + src = os.path.join(base_dir, file_) + print(f"copying file '{src}' -> '{dst}'") + shutil.copy2(src, dst) + + # render markdown as HTML + if src.endswith('.md') and convert_markdown: + rendered_file = dst.removesuffix('.md') + '.html' + try: + content = handle_markdown_file_path(src) + except UnicodeDecodeError: + # perhaps this isn't a markdown file at all for some reason; we + # copied it above so stick with tha + cprint(f"{src} has invalid bytes! skipping", 'yellow') + continue + with open(rendered_file, 'w') as dst_file: + dst_file.write(content) + + def symlink_to_relative_dest(self, base_dir: str, source: str) -> str: + """Given a symlink, make sure it points to something inside the instance and provide its real destination. + + Args: + base_dir: the full absolute path of the instance's pages dir, which the symlink destination must be in. + source: the symlink to check + Returns: + what the symlink points at + """ + if not os.path.realpath(source).startswith(base_dir): + raise ValueError(f"symlink destination {os.path.realpath(source)} is outside the instance!") + # this symlink points to realpath inside base_dir, so relative to base_dir, the symlink dest is... + return os.path.relpath(os.path.realpath(source), base_dir) def build(): @@ -35,113 +160,5 @@ def build(): if os.path.exists(args.output_dir): raise ValueError(f"specified output path '{args.output_dir}' exists as a file!") - output_dir = os.path.abspath(args.output_dir) - instance_dir = os.path.abspath(args.instance_dir) - - # initialize configuration with the path to the instance - init_instance(instance_dir) - - # putting the temporary directory next to the desired output so we can safely rename it later - tmp_output_dir = tempfile.mkdtemp(dir=os.path.dirname(output_dir)) - cprint(f"creating temporary directory '{tmp_output_dir}' for writing", 'green') - - # copy core content - pages_dir = os.path.join(instance_dir, 'pages') - copy_to_destination(pages_dir, tmp_output_dir) - - # copy the program's static dir - program_static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'static') - static_output_dir = os.path.join(tmp_output_dir, 'static') - try: - os.mkdir(static_output_dir) - except FileExistsError: - # already exists - pass - copy_to_destination(program_static_dir, static_output_dir, convert_markdown=False) - - # copy the instance's static dir --- should I deprecate this since it could just be stuff in pages/static/? - custom_static_dir = os.path.join(instance_dir, 'custom-static') - copy_to_destination(custom_static_dir, static_output_dir, convert_markdown=False) - - # move temporary dir to the destination - old_output_dir = f'{output_dir}-old-{os.path.basename(tmp_output_dir)}' - if os.path.exists(output_dir): - cprint(f"renaming '{output_dir}' to '{old_output_dir}'", 'green') - os.rename(output_dir, old_output_dir) - cprint(f"renaming '{tmp_output_dir}' to '{output_dir}'", 'green') - os.rename(tmp_output_dir, output_dir) - os.chmod(output_dir, - stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) - - # TODO: unlink old dir above? arg flag? - - -def copy_to_destination(source_dir: str, dest_dir: str, convert_markdown: bool = True) -> None: - """Walk the source directory and copy and/or convert its contents into the destination. - - Args: - source_dir: the directory to copy into the destination - dest_dir: the directory to place copied/converted files into - convert_markdown: whether or not to convert Markdown files (or simply copy them) - """ - cprint(f"copying files from '{source_dir}' to '{dest_dir}'", 'green') - os.chdir(source_dir) - for base_dir, subdirs, files in os.walk(source_dir): - # remove the absolute path of the pages directory from the base_dir - base_dir = os.path.relpath(base_dir, source_dir) - # create subdirs seen here for subsequent depth - for subdir in subdirs: - dst = os.path.join(dest_dir, base_dir, subdir) - if os.path.islink(os.path.join(base_dir, subdir)): - # keep the link relative to the output directory - src = symlink_to_relative_dest(source_dir, os.path.join(base_dir, subdir)) - print(f"creating directory symlink '{dst}' -> '{src}'") - os.symlink(src, dst, target_is_directory=True) - else: - print(f"creating directory '{dst}'") - try: - os.mkdir(dst) - except FileExistsError: - # already exists - pass - - # process and copy files - for file_ in files: - dst = os.path.join(dest_dir, base_dir, file_) - if os.path.islink(os.path.join(base_dir, file_)): - # keep the link relative to the output directory - src = symlink_to_relative_dest(source_dir, os.path.join(base_dir, file_)) - print(f"creating symlink '{dst}' -> '{src}'") - os.symlink(src, dst, target_is_directory=False) - else: - src = os.path.join(base_dir, file_) - print(f"copying file '{src}' -> '{dst}'") - shutil.copy2(src, dst) - - # render markdown as HTML - if src.endswith('.md') and convert_markdown: - rendered_file = dst.removesuffix('.md') + '.html' - try: - content = handle_markdown_file_path(src) - except UnicodeDecodeError: - # perhaps this isn't a markdown file at all for some reason; we - # copied it above so stick with tha - cprint(f"{src} has invalid bytes! skipping", 'yellow') - continue - with open(rendered_file, 'w') as dst_file: - dst_file.write(content) - - -def symlink_to_relative_dest(base_dir: str, source: str) -> str: - """Given a symlink, make sure it points to something inside the instance and provide its real destination. - - Args: - base_dir: the full absolute path of the instance's pages dir, which the symlink destination must be in. - source: the symlink to check - Returns: - what the symlink points at - """ - if not os.path.realpath(source).startswith(base_dir): - raise ValueError(f"symlink destination {os.path.realpath(source)} is outside the instance!") - # this symlink points to realpath inside base_dir, so relative to base_dir, the symlink dest is... - return os.path.relpath(os.path.realpath(source), base_dir) + site_gen = StaticSiteGenerator(args.instance_dir, args.output_dir) + site_gen.build()