static site generator part 5 --- consolidate markdown code
all that's left is basically the instance copier code (ssg.py) and markdown handling (markdown.py) Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
This commit is contained in:
parent
c9d17523ce
commit
746314f4ed
@ -19,7 +19,7 @@ env = Environment(
|
||||
)
|
||||
|
||||
|
||||
def init_instance(instance_path: str, test_config: dict = None):
|
||||
def init_instance(instance_path: str, extra_config: dict = None):
|
||||
"""Create the instance context, with allowances for customizing path and test settings."""
|
||||
# load the instance config.json, if there is one
|
||||
instance_config = os.path.join(instance_path, 'config.json')
|
||||
@ -29,8 +29,8 @@ def init_instance(instance_path: str, test_config: dict = None):
|
||||
cprint(f"splicing {config_dict} into the config", 'yellow')
|
||||
Config.update(config_dict)
|
||||
|
||||
if test_config:
|
||||
Config.update(test_config)
|
||||
if extra_config:
|
||||
Config.update(extra_config)
|
||||
|
||||
# stash some stuff
|
||||
Config.INSTANCE_DIR = os.path.abspath(instance_path)
|
||||
|
@ -1,72 +0,0 @@
|
||||
"""Miscellaneous helper functions and whatnot.
|
||||
|
||||
SPDX-FileCopyrightText: © 2021 Brian S. Stephan <bss@incorporeal.org>
|
||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
||||
import markdown
|
||||
from markupsafe import Markup
|
||||
|
||||
from incorporealcms.config import Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_meta_str(md, key):
|
||||
"""Provide the page's (parsed in Markup obj md) metadata for the specified key, or '' if unset."""
|
||||
return " ".join(md.Meta.get(key)) if md.Meta.get(key) else ""
|
||||
|
||||
|
||||
def init_md():
|
||||
"""Initialize the Markdown parser.
|
||||
|
||||
This used to done at the app level in __init__, but extensions like footnotes apparently
|
||||
assume the parser to only live for the length of parsing one document, and create double
|
||||
footnote ref links if the one parser sees the same document multiple times.
|
||||
"""
|
||||
# initialize markdown parser from config, but include
|
||||
# extensions our app depends on, like the meta extension
|
||||
return markdown.Markdown(extensions=Config.MARKDOWN_EXTENSIONS + ['meta'],
|
||||
extension_configs=Config.MARKDOWN_EXTENSION_CONFIGS)
|
||||
|
||||
|
||||
def instance_resource_path_to_request_path(path):
|
||||
"""Reverse a relative disk path to the path that would show up in a URL request."""
|
||||
return '/' + re.sub(r'.md$', '', re.sub(r'index.md$', '', path))
|
||||
|
||||
|
||||
def parse_md(path: str):
|
||||
"""Given a file to parse, return file content and other derived data along with the md object.
|
||||
|
||||
Args:
|
||||
path: the path to the file to render
|
||||
"""
|
||||
try:
|
||||
logger.debug("opening path '%s'", path)
|
||||
with open(path, 'r') as input_file:
|
||||
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(input_file.name), tz=datetime.timezone.utc)
|
||||
entry = input_file.read()
|
||||
logger.debug("path '%s' read", path)
|
||||
md = init_md()
|
||||
content = Markup(md.convert(entry))
|
||||
except OSError:
|
||||
logger.exception("path '%s' could not be opened!", path)
|
||||
raise
|
||||
except ValueError:
|
||||
logger.exception("error parsing/rendering markdown!")
|
||||
raise
|
||||
except TypeError:
|
||||
logger.exception("error loading/rendering markdown!")
|
||||
raise
|
||||
|
||||
logger.debug("file metadata: %s", md.Meta)
|
||||
|
||||
page_name = get_meta_str(md, 'title') if md.Meta.get('title') else path
|
||||
page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
|
||||
logger.debug("title (potentially derived): %s", page_title)
|
||||
|
||||
return content, md, page_name, page_title, mtime
|
@ -1,21 +1,78 @@
|
||||
"""General page functionality.
|
||||
"""Process Markdown pages.
|
||||
|
||||
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
|
||||
SPDX-FileCopyrightText: © 2025 Brian S. Stephan <bss@incorporeal.org>
|
||||
SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""
|
||||
import datetime
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
||||
import markdown
|
||||
from markupsafe import Markup
|
||||
from werkzeug.security import safe_join
|
||||
|
||||
from incorporealcms import env
|
||||
from incorporealcms.config import Config
|
||||
from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_meta_str(md, key):
|
||||
"""Provide the page's (parsed in Markup obj md) metadata for the specified key, or '' if unset."""
|
||||
return " ".join(md.Meta.get(key)) if md.Meta.get(key) else ""
|
||||
|
||||
|
||||
def init_md():
|
||||
"""Initialize the Markdown parser.
|
||||
|
||||
This used to done at the app level in __init__, but extensions like footnotes apparently
|
||||
assume the parser to only live for the length of parsing one document, and create double
|
||||
footnote ref links if the one parser sees the same document multiple times.
|
||||
"""
|
||||
# initialize markdown parser from config, but include
|
||||
# extensions our app depends on, like the meta extension
|
||||
return markdown.Markdown(extensions=Config.MARKDOWN_EXTENSIONS + ['meta'],
|
||||
extension_configs=Config.MARKDOWN_EXTENSION_CONFIGS)
|
||||
|
||||
|
||||
def instance_resource_path_to_request_path(path):
|
||||
"""Reverse a relative disk path to the path that would show up in a URL request."""
|
||||
return '/' + re.sub(r'.md$', '', re.sub(r'index.md$', '', path))
|
||||
|
||||
|
||||
def parse_md(path: str):
|
||||
"""Given a file to parse, return file content and other derived data along with the md object.
|
||||
|
||||
Args:
|
||||
path: the path to the file to render
|
||||
"""
|
||||
try:
|
||||
logger.debug("opening path '%s'", path)
|
||||
with open(path, 'r') as input_file:
|
||||
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(input_file.name), tz=datetime.timezone.utc)
|
||||
entry = input_file.read()
|
||||
logger.debug("path '%s' read", path)
|
||||
md = init_md()
|
||||
content = Markup(md.convert(entry))
|
||||
except OSError:
|
||||
logger.exception("path '%s' could not be opened!", path)
|
||||
raise
|
||||
except ValueError:
|
||||
logger.exception("error parsing/rendering markdown!")
|
||||
raise
|
||||
except TypeError:
|
||||
logger.exception("error loading/rendering markdown!")
|
||||
raise
|
||||
|
||||
logger.debug("file metadata: %s", md.Meta)
|
||||
|
||||
page_name = get_meta_str(md, 'title') if md.Meta.get('title') else path
|
||||
page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
|
||||
logger.debug("title (potentially derived): %s", page_title)
|
||||
|
||||
return content, md, page_name, page_title, mtime
|
||||
|
||||
|
||||
def handle_markdown_file_path(path: str) -> str:
|
||||
"""Given a location on disk, attempt to open it and render the markdown within."""
|
||||
try:
|
@ -12,7 +12,132 @@ import tempfile
|
||||
from termcolor import cprint
|
||||
|
||||
from incorporealcms import init_instance
|
||||
from incorporealcms.pages import handle_markdown_file_path
|
||||
from incorporealcms.markdown import handle_markdown_file_path
|
||||
|
||||
|
||||
class StaticSiteGenerator(object):
|
||||
"""Generate static site output based on the instance's content."""
|
||||
|
||||
def __init__(self, instance_dir: str, output_dir: str):
|
||||
"""Create the object to run various operations to generate the static site.
|
||||
|
||||
Args:
|
||||
instance_dir: the directory from which to read an instance format set of content
|
||||
output_dir: the directory to write the generated static site to
|
||||
"""
|
||||
self.instance_dir = instance_dir
|
||||
self.output_dir = output_dir
|
||||
|
||||
instance_dir = os.path.abspath(instance_dir)
|
||||
output_dir = os.path.abspath(output_dir)
|
||||
|
||||
# initialize configuration with the path to the instance
|
||||
init_instance(instance_dir)
|
||||
|
||||
def build(self):
|
||||
"""Build the whole static site."""
|
||||
# putting the temporary directory next to the desired output so we can safely rename it later
|
||||
tmp_output_dir = tempfile.mkdtemp(dir=os.path.dirname(self.output_dir))
|
||||
cprint(f"creating temporary directory '{tmp_output_dir}' for writing", 'green')
|
||||
|
||||
# copy core content
|
||||
pages_dir = os.path.join(self.instance_dir, 'pages')
|
||||
self.build_in_destination(pages_dir, tmp_output_dir)
|
||||
|
||||
# copy the program's static dir
|
||||
program_static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'static')
|
||||
static_output_dir = os.path.join(tmp_output_dir, 'static')
|
||||
try:
|
||||
os.mkdir(static_output_dir)
|
||||
except FileExistsError:
|
||||
# already exists
|
||||
pass
|
||||
self.build_in_destination(program_static_dir, static_output_dir, convert_markdown=False)
|
||||
|
||||
# copy the instance's static dir --- should I deprecate this since it could just be stuff in pages/static/?
|
||||
custom_static_dir = os.path.join(self.instance_dir, 'custom-static')
|
||||
self.build_in_destination(custom_static_dir, static_output_dir, convert_markdown=False)
|
||||
|
||||
# move temporary dir to the destination
|
||||
old_output_dir = f'{self.output_dir}-old-{os.path.basename(tmp_output_dir)}'
|
||||
if os.path.exists(self.output_dir):
|
||||
cprint(f"renaming '{self.output_dir}' to '{old_output_dir}'", 'green')
|
||||
os.rename(self.output_dir, old_output_dir)
|
||||
cprint(f"renaming '{tmp_output_dir}' to '{self.output_dir}'", 'green')
|
||||
os.rename(tmp_output_dir, self.output_dir)
|
||||
os.chmod(self.output_dir,
|
||||
stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
||||
|
||||
# TODO: unlink old dir above? arg flag?
|
||||
|
||||
def build_in_destination(self, source_dir: str, dest_dir: str, convert_markdown: bool = True) -> None:
|
||||
"""Walk the source directory and copy and/or convert its contents into the destination.
|
||||
|
||||
Args:
|
||||
source_dir: the directory to copy into the destination
|
||||
dest_dir: the directory to place copied/converted files into
|
||||
convert_markdown: whether or not to convert Markdown files (or simply copy them)
|
||||
"""
|
||||
cprint(f"copying files from '{source_dir}' to '{dest_dir}'", 'green')
|
||||
os.chdir(source_dir)
|
||||
for base_dir, subdirs, files in os.walk(source_dir):
|
||||
# remove the absolute path of the pages directory from the base_dir
|
||||
base_dir = os.path.relpath(base_dir, source_dir)
|
||||
# create subdirs seen here for subsequent depth
|
||||
for subdir in subdirs:
|
||||
dst = os.path.join(dest_dir, base_dir, subdir)
|
||||
if os.path.islink(os.path.join(base_dir, subdir)):
|
||||
# keep the link relative to the output directory
|
||||
src = self.symlink_to_relative_dest(source_dir, os.path.join(base_dir, subdir))
|
||||
print(f"creating directory symlink '{dst}' -> '{src}'")
|
||||
os.symlink(src, dst, target_is_directory=True)
|
||||
else:
|
||||
print(f"creating directory '{dst}'")
|
||||
try:
|
||||
os.mkdir(dst)
|
||||
except FileExistsError:
|
||||
# already exists
|
||||
pass
|
||||
|
||||
# process and copy files
|
||||
for file_ in files:
|
||||
dst = os.path.join(dest_dir, base_dir, file_)
|
||||
if os.path.islink(os.path.join(base_dir, file_)):
|
||||
# keep the link relative to the output directory
|
||||
src = self.symlink_to_relative_dest(source_dir, os.path.join(base_dir, file_))
|
||||
print(f"creating symlink '{dst}' -> '{src}'")
|
||||
os.symlink(src, dst, target_is_directory=False)
|
||||
else:
|
||||
src = os.path.join(base_dir, file_)
|
||||
print(f"copying file '{src}' -> '{dst}'")
|
||||
shutil.copy2(src, dst)
|
||||
|
||||
# render markdown as HTML
|
||||
if src.endswith('.md') and convert_markdown:
|
||||
rendered_file = dst.removesuffix('.md') + '.html'
|
||||
try:
|
||||
content = handle_markdown_file_path(src)
|
||||
except UnicodeDecodeError:
|
||||
# perhaps this isn't a markdown file at all for some reason; we
|
||||
# copied it above so stick with tha
|
||||
cprint(f"{src} has invalid bytes! skipping", 'yellow')
|
||||
continue
|
||||
with open(rendered_file, 'w') as dst_file:
|
||||
dst_file.write(content)
|
||||
|
||||
def symlink_to_relative_dest(self, base_dir: str, source: str) -> str:
|
||||
"""Given a symlink, make sure it points to something inside the instance and provide its real destination.
|
||||
|
||||
Args:
|
||||
base_dir: the full absolute path of the instance's pages dir, which the symlink destination must be in.
|
||||
source: the symlink to check
|
||||
Returns:
|
||||
what the symlink points at
|
||||
"""
|
||||
if not os.path.realpath(source).startswith(base_dir):
|
||||
raise ValueError(f"symlink destination {os.path.realpath(source)} is outside the instance!")
|
||||
# this symlink points to realpath inside base_dir, so relative to base_dir, the symlink dest is...
|
||||
return os.path.relpath(os.path.realpath(source), base_dir)
|
||||
|
||||
|
||||
def build():
|
||||
@ -35,113 +160,5 @@ def build():
|
||||
if os.path.exists(args.output_dir):
|
||||
raise ValueError(f"specified output path '{args.output_dir}' exists as a file!")
|
||||
|
||||
output_dir = os.path.abspath(args.output_dir)
|
||||
instance_dir = os.path.abspath(args.instance_dir)
|
||||
|
||||
# initialize configuration with the path to the instance
|
||||
init_instance(instance_dir)
|
||||
|
||||
# putting the temporary directory next to the desired output so we can safely rename it later
|
||||
tmp_output_dir = tempfile.mkdtemp(dir=os.path.dirname(output_dir))
|
||||
cprint(f"creating temporary directory '{tmp_output_dir}' for writing", 'green')
|
||||
|
||||
# copy core content
|
||||
pages_dir = os.path.join(instance_dir, 'pages')
|
||||
copy_to_destination(pages_dir, tmp_output_dir)
|
||||
|
||||
# copy the program's static dir
|
||||
program_static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'static')
|
||||
static_output_dir = os.path.join(tmp_output_dir, 'static')
|
||||
try:
|
||||
os.mkdir(static_output_dir)
|
||||
except FileExistsError:
|
||||
# already exists
|
||||
pass
|
||||
copy_to_destination(program_static_dir, static_output_dir, convert_markdown=False)
|
||||
|
||||
# copy the instance's static dir --- should I deprecate this since it could just be stuff in pages/static/?
|
||||
custom_static_dir = os.path.join(instance_dir, 'custom-static')
|
||||
copy_to_destination(custom_static_dir, static_output_dir, convert_markdown=False)
|
||||
|
||||
# move temporary dir to the destination
|
||||
old_output_dir = f'{output_dir}-old-{os.path.basename(tmp_output_dir)}'
|
||||
if os.path.exists(output_dir):
|
||||
cprint(f"renaming '{output_dir}' to '{old_output_dir}'", 'green')
|
||||
os.rename(output_dir, old_output_dir)
|
||||
cprint(f"renaming '{tmp_output_dir}' to '{output_dir}'", 'green')
|
||||
os.rename(tmp_output_dir, output_dir)
|
||||
os.chmod(output_dir,
|
||||
stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
||||
|
||||
# TODO: unlink old dir above? arg flag?
|
||||
|
||||
|
||||
def copy_to_destination(source_dir: str, dest_dir: str, convert_markdown: bool = True) -> None:
|
||||
"""Walk the source directory and copy and/or convert its contents into the destination.
|
||||
|
||||
Args:
|
||||
source_dir: the directory to copy into the destination
|
||||
dest_dir: the directory to place copied/converted files into
|
||||
convert_markdown: whether or not to convert Markdown files (or simply copy them)
|
||||
"""
|
||||
cprint(f"copying files from '{source_dir}' to '{dest_dir}'", 'green')
|
||||
os.chdir(source_dir)
|
||||
for base_dir, subdirs, files in os.walk(source_dir):
|
||||
# remove the absolute path of the pages directory from the base_dir
|
||||
base_dir = os.path.relpath(base_dir, source_dir)
|
||||
# create subdirs seen here for subsequent depth
|
||||
for subdir in subdirs:
|
||||
dst = os.path.join(dest_dir, base_dir, subdir)
|
||||
if os.path.islink(os.path.join(base_dir, subdir)):
|
||||
# keep the link relative to the output directory
|
||||
src = symlink_to_relative_dest(source_dir, os.path.join(base_dir, subdir))
|
||||
print(f"creating directory symlink '{dst}' -> '{src}'")
|
||||
os.symlink(src, dst, target_is_directory=True)
|
||||
else:
|
||||
print(f"creating directory '{dst}'")
|
||||
try:
|
||||
os.mkdir(dst)
|
||||
except FileExistsError:
|
||||
# already exists
|
||||
pass
|
||||
|
||||
# process and copy files
|
||||
for file_ in files:
|
||||
dst = os.path.join(dest_dir, base_dir, file_)
|
||||
if os.path.islink(os.path.join(base_dir, file_)):
|
||||
# keep the link relative to the output directory
|
||||
src = symlink_to_relative_dest(source_dir, os.path.join(base_dir, file_))
|
||||
print(f"creating symlink '{dst}' -> '{src}'")
|
||||
os.symlink(src, dst, target_is_directory=False)
|
||||
else:
|
||||
src = os.path.join(base_dir, file_)
|
||||
print(f"copying file '{src}' -> '{dst}'")
|
||||
shutil.copy2(src, dst)
|
||||
|
||||
# render markdown as HTML
|
||||
if src.endswith('.md') and convert_markdown:
|
||||
rendered_file = dst.removesuffix('.md') + '.html'
|
||||
try:
|
||||
content = handle_markdown_file_path(src)
|
||||
except UnicodeDecodeError:
|
||||
# perhaps this isn't a markdown file at all for some reason; we
|
||||
# copied it above so stick with tha
|
||||
cprint(f"{src} has invalid bytes! skipping", 'yellow')
|
||||
continue
|
||||
with open(rendered_file, 'w') as dst_file:
|
||||
dst_file.write(content)
|
||||
|
||||
|
||||
def symlink_to_relative_dest(base_dir: str, source: str) -> str:
|
||||
"""Given a symlink, make sure it points to something inside the instance and provide its real destination.
|
||||
|
||||
Args:
|
||||
base_dir: the full absolute path of the instance's pages dir, which the symlink destination must be in.
|
||||
source: the symlink to check
|
||||
Returns:
|
||||
what the symlink points at
|
||||
"""
|
||||
if not os.path.realpath(source).startswith(base_dir):
|
||||
raise ValueError(f"symlink destination {os.path.realpath(source)} is outside the instance!")
|
||||
# this symlink points to realpath inside base_dir, so relative to base_dir, the symlink dest is...
|
||||
return os.path.relpath(os.path.realpath(source), base_dir)
|
||||
site_gen = StaticSiteGenerator(args.instance_dir, args.output_dir)
|
||||
site_gen.build()
|
||||
|
Loading…
x
Reference in New Issue
Block a user