From 0d59e6432302f646eaf0e1fb65eb60f7678e0485 Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Thu, 13 Mar 2025 16:14:12 -0500 Subject: [PATCH] static site generator part 1 --- incomplete, but kinda works Signed-off-by: Brian S. Stephan --- incorporealcms/__init__.py | 54 +++++-------- incorporealcms/config.py | 9 ++- incorporealcms/lib.py | 62 +++++---------- incorporealcms/pages.py | 80 +++++++------------ incorporealcms/ssg.py | 120 +++++++++++++++++++++++++++++ incorporealcms/templates/base.html | 4 +- tests/instance/config.json | 27 +++++++ tests/instance/pages/redirect.md | 1 - 8 files changed, 224 insertions(+), 133 deletions(-) create mode 100644 incorporealcms/ssg.py create mode 100644 tests/instance/config.json delete mode 100644 tests/instance/pages/redirect.md diff --git a/incorporealcms/__init__.py b/incorporealcms/__init__.py index c111cff..7960e33 100644 --- a/incorporealcms/__init__.py +++ b/incorporealcms/__init__.py @@ -3,48 +3,36 @@ SPDX-FileCopyrightText: © 2020 Brian S. Stephan SPDX-License-Identifier: AGPL-3.0-or-later """ +import json import logging import os from logging.config import dictConfig -from flask import Flask, request +from jinja2 import Environment, PackageLoader, select_autoescape + +from incorporealcms.config import Config + +env = Environment( + loader=PackageLoader('incorporealcms'), + autoescape=select_autoescape(), +) -def create_app(instance_path=None, test_config=None): - """Create the Flask app, with allowances for customizing path and test settings.""" - app = Flask(__name__, instance_relative_config=True, instance_path=instance_path) +def init_instance(instance_path: str, test_config: dict = None): + """Create the instance context, with allowances for customizing path and test settings.""" + # load the instance config.json, if there is one + instance_config = os.path.join(instance_path, 'config.json') + if os.path.isfile(instance_config): + with open(instance_config, 'r') as config: + Config.update(json.load(config)) - # if it doesn't already exist, create the instance folder - os.makedirs(app.instance_path, exist_ok=True) - - # load defaults from config provided with the application - app.config.from_object('incorporealcms.config.Config') - # load specific instance configurations - app.config.from_pyfile('config.py', silent=True) if test_config: - app.config.from_mapping(test_config) + Config.update(test_config) - dictConfig(app.config['LOGGING']) + # stash some stuff + Config.INSTANCE_DIR = os.path.abspath(instance_path) + dictConfig(Config.LOGGING) logger = logging.getLogger(__name__) - logger.debug("instance path: %s", app.instance_path) - - @app.before_request - def log_request(): - logger.info("REQUEST: %s %s", request.method, request.path) - - @app.after_request - def log_response(response): - logger.info("RESPONSE: %s %s: %s", request.method, request.path, response.status) - return response - - from . import error_pages, feed, pages, static - app.register_blueprint(feed.bp) - app.register_blueprint(pages.bp) - app.register_blueprint(static.bp) - app.register_error_handler(400, error_pages.bad_request) - app.register_error_handler(404, error_pages.page_not_found) - app.register_error_handler(500, error_pages.internal_server_error) - - return app + logger.debug("instance dir: %s", Config.INSTANCE_DIR) diff --git a/incorporealcms/config.py b/incorporealcms/config.py index bd877bc..38791e0 100644 --- a/incorporealcms/config.py +++ b/incorporealcms/config.py @@ -61,11 +61,18 @@ class Config(object): } DEFAULT_PAGE_STYLE = 'light' - DOMAIN_NAME = 'example.com' + DOMAIN_NAME = 'example.org' TITLE_SUFFIX = DOMAIN_NAME + BASE_HOST = 'http://' + DOMAIN_NAME CONTACT_EMAIL = 'admin@example.com' # feed settings AUTHOR = {'name': 'Test Name', 'email': 'admin@example.com'} # specify FAVICON in your instance config.py to override the provided icon + + @classmethod + def update(cls, config: dict): + """Update this configuration with a dictionary of values from elsewhere.""" + for key, value in config.items(): + setattr(cls, key, value) diff --git a/incorporealcms/lib.py b/incorporealcms/lib.py index c4e323a..d1d75ea 100644 --- a/incorporealcms/lib.py +++ b/incorporealcms/lib.py @@ -9,10 +9,10 @@ import os import re import markdown -from flask import current_app as app -from flask import make_response, render_template, request from markupsafe import Markup +from incorporealcms.config import Config + logger = logging.getLogger(__name__) @@ -30,31 +30,31 @@ def init_md(): """ # initialize markdown parser from config, but include # extensions our app depends on, like the meta extension - return markdown.Markdown(extensions=app.config['MARKDOWN_EXTENSIONS'] + ['meta'], - extension_configs=app.config['MARKDOWN_EXTENSION_CONFIGS']) + return markdown.Markdown(extensions=Config.MARKDOWN_EXTENSIONS + ['meta'], + extension_configs=Config.MARKDOWN_EXTENSION_CONFIGS) def instance_resource_path_to_request_path(path): - """Reverse a (presumed to exist) RELATIVE disk path to the canonical path that would show up in a Flask route. + """Reverse a relative disk path to the path that would show up in a URL request.""" + return '/' + re.sub(r'.md$', '', re.sub(r'index.md$', '', path)) - This does not include the leading /, so aside from the root index case, this should be - bidirectional. + +def parse_md(path: str): + """Given a file to parse, return file content and other derived data along with the md object. + + Args: + path: the path to the file to render """ - return re.sub(r'^pages/', '', re.sub(r'.md$', '', re.sub(r'index.md$', '', path))) - - -def parse_md(resolved_path): - """Given a file to parse, return file content and other derived data along with the md object.""" try: - logger.debug("opening resolved path '%s'", resolved_path) - with app.open_instance_resource(resolved_path, 'r') as entry_file: - mtime = datetime.datetime.fromtimestamp(os.path.getmtime(entry_file.name), tz=datetime.timezone.utc) - entry = entry_file.read() - logger.debug("resolved path '%s' read", resolved_path) + logger.debug("opening path '%s'", path) + with open(path, 'r') as input_file: + mtime = datetime.datetime.fromtimestamp(os.path.getmtime(input_file.name), tz=datetime.timezone.utc) + entry = input_file.read() + logger.debug("path '%s' read", path) md = init_md() content = Markup(md.convert(entry)) except OSError: - logger.exception("resolved path '%s' could not be opened!", resolved_path) + logger.exception("path '%s' could not be opened!", path) raise except ValueError: logger.exception("error parsing/rendering markdown!") @@ -65,30 +65,8 @@ def parse_md(resolved_path): logger.debug("file metadata: %s", md.Meta) - page_name = (get_meta_str(md, 'title') if md.Meta.get('title') else - f'/{instance_resource_path_to_request_path(resolved_path)}') - page_title = f'{page_name} - {app.config["TITLE_SUFFIX"]}' if page_name else app.config['TITLE_SUFFIX'] + page_name = get_meta_str(md, 'title') if md.Meta.get('title') else path + page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX logger.debug("title (potentially derived): %s", page_title) return content, md, page_name, page_title, mtime - - -def render(template_name_or_list, **context): - """Wrap Flask's render_template. - - * Determine the proper site theme to use in the template and provide it. - """ - page_styles = app.config['PAGE_STYLES'] - selected_style = request.args.get('style', None) - if selected_style: - user_style = selected_style - else: - user_style = request.cookies.get('user-style') - logger.debug("user style cookie: %s", user_style) - context['user_style'] = page_styles.get(user_style, page_styles.get(app.config['DEFAULT_PAGE_STYLE'])) - context['page_styles'] = page_styles - - resp = make_response(render_template(template_name_or_list, **context)) - if selected_style: - resp.set_cookie('user-style', selected_style) - return resp diff --git a/incorporealcms/pages.py b/incorporealcms/pages.py index abdfa38..35c3857 100644 --- a/incorporealcms/pages.py +++ b/incorporealcms/pages.py @@ -6,77 +6,49 @@ SPDX-License-Identifier: AGPL-3.0-or-later import logging import os -from flask import Blueprint, abort -from flask import current_app as app -from flask import redirect, request, send_from_directory from markupsafe import Markup from werkzeug.security import safe_join -from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md, render +from incorporealcms import env +from incorporealcms.config import Config +from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md logger = logging.getLogger(__name__) -bp = Blueprint('pages', __name__, url_prefix='/') - -@bp.route('/', defaults={'path': 'index'}) -@bp.route('/') -def display_page(path): - """Get the file contents of the requested path and render the file.""" - try: - resolved_path, render_type = request_path_to_instance_resource_path(path) - logger.debug("received request for path '%s', resolved to '%s', type '%s'", - path, resolved_path, render_type) - except PermissionError: - abort(400) - except IsADirectoryError: - return redirect(f'/{path}/', code=301) - except FileNotFoundError: - abort(404) - - if render_type == 'file': - return send_from_directory(app.instance_path, resolved_path) - elif render_type == 'symlink': - logger.debug("attempting to redirect path '%s' to reverse of resource '%s'", path, resolved_path) - redirect_path = f'/{instance_resource_path_to_request_path(resolved_path)}' - logger.debug("redirect path: '%s'", redirect_path) - return redirect(redirect_path, code=301) - elif render_type == 'markdown': - logger.debug("treating path '%s' as markdown '%s'", path, resolved_path) - return handle_markdown_file_path(resolved_path) - else: - logger.exception("unsupported render_type '%s'!?", render_type) - abort(500) - - -def handle_markdown_file_path(resolved_path): +def handle_markdown_file_path(path: str) -> str: """Given a location on disk, attempt to open it and render the markdown within.""" try: - content, md, page_name, page_title, mtime = parse_md(resolved_path) + content, md, page_name, page_title, mtime = parse_md(path) except OSError: - logger.exception("resolved path '%s' could not be opened!", resolved_path) - abort(500) + logger.exception("path '%s' could not be opened!", path) + raise except ValueError: logger.exception("error parsing/rendering markdown!") - abort(500) + raise except TypeError: logger.exception("error loading/rendering markdown!") - abort(500) + raise else: - parent_navs = generate_parent_navs(resolved_path) + parent_navs = generate_parent_navs(path) extra_footer = get_meta_str(md, 'footer') if md.Meta.get('footer') else None - template = get_meta_str(md, 'template') if md.Meta.get('template') else 'base.html' + template_name = get_meta_str(md, 'template') if md.Meta.get('template') else 'base.html' # check if this has a HTTP redirect redirect_url = get_meta_str(md, 'redirect') if md.Meta.get('redirect') else None if redirect_url: - logger.debug("redirecting via meta tag to '%s'", redirect_url) - return redirect(redirect_url, code=301) + raise ValueError("redirects in markdown are unsupported!") - return render(template, title=page_title, description=get_meta_str(md, 'description'), - image=get_meta_str(md, 'image'), base_url=request.base_url, content=content, - navs=parent_navs, mtime=mtime.strftime('%Y-%m-%d %H:%M:%S %Z'), - extra_footer=extra_footer) + template = env.get_template(template_name) + return template.render(title=page_title, + config=Config, + description=get_meta_str(md, 'description'), + image=get_meta_str(md, 'image'), + content=content, + user_style=Config.PAGE_STYLES.get(Config.DEFAULT_PAGE_STYLE), + base_url=Config.BASE_HOST, navs=parent_navs, + mtime=mtime.strftime('%Y-%m-%d %H:%M:%S %Z'), + extra_footer=extra_footer) def request_path_to_instance_resource_path(path): @@ -139,9 +111,9 @@ def request_path_to_instance_resource_path(path): def generate_parent_navs(path): """Create a series of paths/links to navigate up from the given resource path.""" - if path == 'pages/index.md': + if path == 'index.md': # bail and return the domain name as a terminal case - return [(app.config['DOMAIN_NAME'], '/')] + return [(Config.DOMAIN_NAME, '/')] else: if path.endswith('index.md'): # index case: one dirname for foo/bar/index.md -> foo/bar, one for foo/bar -> foo @@ -152,7 +124,7 @@ def generate_parent_navs(path): # generate the request path (i.e. what the link will be) for this path, and # also the resource path of this parent (which is always a dir, so always index.md) - request_path = f'/{instance_resource_path_to_request_path(path)}' + request_path = instance_resource_path_to_request_path(path) parent_resource_path = os.path.join(parent_resource_dir, 'index.md') logger.debug("resource path: '%s'; request path: '%s'; parent resource path: '%s'", path, @@ -163,7 +135,7 @@ def generate_parent_navs(path): # read the resource try: - with app.open_instance_resource(path, 'r') as entry_file: + with open(path, 'r') as entry_file: entry = entry_file.read() _ = Markup(md.convert(entry)) page_name = (" ".join(md.Meta.get('title')) if md.Meta.get('title') diff --git a/incorporealcms/ssg.py b/incorporealcms/ssg.py new file mode 100644 index 0000000..5016ef2 --- /dev/null +++ b/incorporealcms/ssg.py @@ -0,0 +1,120 @@ +"""Build an instance as a static site suitable for serving via e.g. Nginx. + +SPDX-FileCopyrightText: © 2022 Brian S. Stephan +SPDX-License-Identifier: AGPL-3.0-or-later +""" +import argparse +import os +import shutil +import stat +import tempfile + +from termcolor import cprint + +from incorporealcms import init_instance +from incorporealcms.pages import handle_markdown_file_path + + +def build(): + """Build the static site generated against an instance directory.""" + parser = argparse.ArgumentParser( + description="Build the static site generated against an instance directory.", + ) + parser.add_argument( + 'instance_dir', help="path to instance directory root (NOTE: the program will go into pages/)" + ) + parser.add_argument( + 'output_dir', help="path to directory to output to (NOTE: the program must be able to write into its parent!)" + ) + args = parser.parse_args() + + # check output path before doing work + if not os.path.isdir(args.output_dir): + # if it doesn't exist, great, we'll just move the temporary dir later; + # if it exists and is a dir, that's fine, but if it's a file, we should error + if os.path.exists(args.output_dir): + raise ValueError(f"specified output path '{args.output_dir}' exists as a file!") + + output_dir = os.path.abspath(args.output_dir) + instance_dir = os.path.abspath(args.instance_dir) + pages_dir = os.path.join(instance_dir, 'pages') + + # initialize configuration with the path to the instance + init_instance(instance_dir) + + # putting the temporary directory next to the desired output so we can safely rename it later + tmp_output_dir = tempfile.mkdtemp(dir=os.path.dirname(output_dir)) + cprint(f"creating temporary directory '{tmp_output_dir}' for writing", 'green') + + # CORE CONTENT + # render and/or copy into the output dir after changing into the instance dir (to simplify paths) + os.chdir(pages_dir) + for base_dir, subdirs, files in os.walk(pages_dir): + # remove the absolute path of the pages directory from the base_dir + base_dir = os.path.relpath(base_dir, pages_dir) + # create subdirs seen here for subsequent depth + for subdir in subdirs: + dst = os.path.join(tmp_output_dir, base_dir, subdir) + if os.path.islink(os.path.join(base_dir, subdir)): + # keep the link relative to the output directory + src = symlink_to_relative_dest(pages_dir, os.path.join(base_dir, subdir)) + print(f"creating directory symlink '{dst}' -> '{src}'") + os.symlink(src, dst, target_is_directory=True) + else: + print(f"creating directory '{dst}'") + os.mkdir(dst) + + # process and copy files + for file_ in files: + dst = os.path.join(tmp_output_dir, base_dir, file_) + if os.path.islink(os.path.join(base_dir, file_)): + # keep the link relative to the output directory + src = symlink_to_relative_dest(pages_dir, os.path.join(base_dir, file_)) + print(f"creating symlink '{dst}' -> '{src}'") + os.symlink(src, dst, target_is_directory=False) + else: + src = os.path.join(base_dir, file_) + print(f"copying file '{src}' -> '{dst}'") + shutil.copy2(src, dst) + + # render markdown as HTML + if src.endswith('.md'): + rendered_file = dst.removesuffix('.md') + '.html' + try: + content = handle_markdown_file_path(src) + except UnicodeDecodeError: + # perhaps this isn't a markdown file at all for some reason; we + # copied it above so stick with tha + cprint(f"{src} has invalid bytes! skipping", 'yellow') + continue + with open(rendered_file, 'w') as dst_file: + dst_file.write(content) + + # TODO: STATIC DIR + + # move temporary dir to the destination + old_output_dir = f'{output_dir}-old-{os.path.basename(tmp_output_dir)}' + if os.path.exists(output_dir): + cprint(f"renaming '{output_dir}' to '{old_output_dir}'", 'green') + os.rename(output_dir, old_output_dir) + cprint(f"renaming '{tmp_output_dir}' to '{output_dir}'", 'green') + os.rename(tmp_output_dir, output_dir) + os.chmod(output_dir, + stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + # TODO: unlink old dir above? arg flag? + + +def symlink_to_relative_dest(base_dir: str, source: str) -> str: + """Given a symlink, make sure it points to something inside the instance and provide its real destination. + + Args: + base_dir: the full absolute path of the instance's pages dir, which the symlink destination must be in. + source: the symlink to check + Returns: + what the symlink points at + """ + if not os.path.realpath(source).startswith(base_dir): + raise ValueError(f"symlink destination {os.path.realpath(source)} is outside the instance!") + # this symlink points to realpath inside base_dir, so relative to base_dir, the symlink dest is... + return os.path.relpath(os.path.realpath(source), base_dir) diff --git a/incorporealcms/templates/base.html b/incorporealcms/templates/base.html index c048f94..737f351 100644 --- a/incorporealcms/templates/base.html +++ b/incorporealcms/templates/base.html @@ -8,12 +8,12 @@ SPDX-License-Identifier: AGPL-3.0-or-later {{ title }} {% if title %}{% endif %} {% if description %}{% endif %} -{% if image %}{% endif %} +{% if image %}{% endif %} - + diff --git a/tests/instance/config.json b/tests/instance/config.json new file mode 100644 index 0000000..ccafca1 --- /dev/null +++ b/tests/instance/config.json @@ -0,0 +1,27 @@ +{ + "LOGGING": { + "version": 1, + "formatters": { + "default": { + "format": "[%(asctime)s %(levelname)-7s %(name)s] %(message)s" + } + }, + "handlers": { + "console": { + "level": "DEBUG", + "class": "logging.StreamHandler", + "formatter": "default" + } + }, + "loggers": { + "incorporealcms.mdx": { + "level": "DEBUG", + "handlers": ["console"] + }, + "incorporealcms.pages": { + "level": "DEBUG", + "handlers": ["console"] + } + } + } +} diff --git a/tests/instance/pages/redirect.md b/tests/instance/pages/redirect.md deleted file mode 100644 index 897ad86..0000000 --- a/tests/instance/pages/redirect.md +++ /dev/null @@ -1 +0,0 @@ -Redirect: http://www.google.com/