static site generator part 1 --- incomplete, but kinda works

Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
This commit is contained in:
Brian S. Stephan 2025-03-13 16:14:12 -05:00
parent 68d6f83b4c
commit 0d59e64323
Signed by: bss
GPG Key ID: 3DE06D3180895FCB
8 changed files with 224 additions and 133 deletions

View File

@ -3,48 +3,36 @@
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org> SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later SPDX-License-Identifier: AGPL-3.0-or-later
""" """
import json
import logging import logging
import os import os
from logging.config import dictConfig from logging.config import dictConfig
from flask import Flask, request from jinja2 import Environment, PackageLoader, select_autoescape
from incorporealcms.config import Config
env = Environment(
loader=PackageLoader('incorporealcms'),
autoescape=select_autoescape(),
)
def create_app(instance_path=None, test_config=None): def init_instance(instance_path: str, test_config: dict = None):
"""Create the Flask app, with allowances for customizing path and test settings.""" """Create the instance context, with allowances for customizing path and test settings."""
app = Flask(__name__, instance_relative_config=True, instance_path=instance_path) # load the instance config.json, if there is one
instance_config = os.path.join(instance_path, 'config.json')
if os.path.isfile(instance_config):
with open(instance_config, 'r') as config:
Config.update(json.load(config))
# if it doesn't already exist, create the instance folder
os.makedirs(app.instance_path, exist_ok=True)
# load defaults from config provided with the application
app.config.from_object('incorporealcms.config.Config')
# load specific instance configurations
app.config.from_pyfile('config.py', silent=True)
if test_config: if test_config:
app.config.from_mapping(test_config) Config.update(test_config)
dictConfig(app.config['LOGGING']) # stash some stuff
Config.INSTANCE_DIR = os.path.abspath(instance_path)
dictConfig(Config.LOGGING)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.debug("instance path: %s", app.instance_path) logger.debug("instance dir: %s", Config.INSTANCE_DIR)
@app.before_request
def log_request():
logger.info("REQUEST: %s %s", request.method, request.path)
@app.after_request
def log_response(response):
logger.info("RESPONSE: %s %s: %s", request.method, request.path, response.status)
return response
from . import error_pages, feed, pages, static
app.register_blueprint(feed.bp)
app.register_blueprint(pages.bp)
app.register_blueprint(static.bp)
app.register_error_handler(400, error_pages.bad_request)
app.register_error_handler(404, error_pages.page_not_found)
app.register_error_handler(500, error_pages.internal_server_error)
return app

View File

@ -61,11 +61,18 @@ class Config(object):
} }
DEFAULT_PAGE_STYLE = 'light' DEFAULT_PAGE_STYLE = 'light'
DOMAIN_NAME = 'example.com' DOMAIN_NAME = 'example.org'
TITLE_SUFFIX = DOMAIN_NAME TITLE_SUFFIX = DOMAIN_NAME
BASE_HOST = 'http://' + DOMAIN_NAME
CONTACT_EMAIL = 'admin@example.com' CONTACT_EMAIL = 'admin@example.com'
# feed settings # feed settings
AUTHOR = {'name': 'Test Name', 'email': 'admin@example.com'} AUTHOR = {'name': 'Test Name', 'email': 'admin@example.com'}
# specify FAVICON in your instance config.py to override the provided icon # specify FAVICON in your instance config.py to override the provided icon
@classmethod
def update(cls, config: dict):
"""Update this configuration with a dictionary of values from elsewhere."""
for key, value in config.items():
setattr(cls, key, value)

View File

@ -9,10 +9,10 @@ import os
import re import re
import markdown import markdown
from flask import current_app as app
from flask import make_response, render_template, request
from markupsafe import Markup from markupsafe import Markup
from incorporealcms.config import Config
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -30,31 +30,31 @@ def init_md():
""" """
# initialize markdown parser from config, but include # initialize markdown parser from config, but include
# extensions our app depends on, like the meta extension # extensions our app depends on, like the meta extension
return markdown.Markdown(extensions=app.config['MARKDOWN_EXTENSIONS'] + ['meta'], return markdown.Markdown(extensions=Config.MARKDOWN_EXTENSIONS + ['meta'],
extension_configs=app.config['MARKDOWN_EXTENSION_CONFIGS']) extension_configs=Config.MARKDOWN_EXTENSION_CONFIGS)
def instance_resource_path_to_request_path(path): def instance_resource_path_to_request_path(path):
"""Reverse a (presumed to exist) RELATIVE disk path to the canonical path that would show up in a Flask route. """Reverse a relative disk path to the path that would show up in a URL request."""
return '/' + re.sub(r'.md$', '', re.sub(r'index.md$', '', path))
This does not include the leading /, so aside from the root index case, this should be
bidirectional. def parse_md(path: str):
"""Given a file to parse, return file content and other derived data along with the md object.
Args:
path: the path to the file to render
""" """
return re.sub(r'^pages/', '', re.sub(r'.md$', '', re.sub(r'index.md$', '', path)))
def parse_md(resolved_path):
"""Given a file to parse, return file content and other derived data along with the md object."""
try: try:
logger.debug("opening resolved path '%s'", resolved_path) logger.debug("opening path '%s'", path)
with app.open_instance_resource(resolved_path, 'r') as entry_file: with open(path, 'r') as input_file:
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(entry_file.name), tz=datetime.timezone.utc) mtime = datetime.datetime.fromtimestamp(os.path.getmtime(input_file.name), tz=datetime.timezone.utc)
entry = entry_file.read() entry = input_file.read()
logger.debug("resolved path '%s' read", resolved_path) logger.debug("path '%s' read", path)
md = init_md() md = init_md()
content = Markup(md.convert(entry)) content = Markup(md.convert(entry))
except OSError: except OSError:
logger.exception("resolved path '%s' could not be opened!", resolved_path) logger.exception("path '%s' could not be opened!", path)
raise raise
except ValueError: except ValueError:
logger.exception("error parsing/rendering markdown!") logger.exception("error parsing/rendering markdown!")
@ -65,30 +65,8 @@ def parse_md(resolved_path):
logger.debug("file metadata: %s", md.Meta) logger.debug("file metadata: %s", md.Meta)
page_name = (get_meta_str(md, 'title') if md.Meta.get('title') else page_name = get_meta_str(md, 'title') if md.Meta.get('title') else path
f'/{instance_resource_path_to_request_path(resolved_path)}') page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
page_title = f'{page_name} - {app.config["TITLE_SUFFIX"]}' if page_name else app.config['TITLE_SUFFIX']
logger.debug("title (potentially derived): %s", page_title) logger.debug("title (potentially derived): %s", page_title)
return content, md, page_name, page_title, mtime return content, md, page_name, page_title, mtime
def render(template_name_or_list, **context):
"""Wrap Flask's render_template.
* Determine the proper site theme to use in the template and provide it.
"""
page_styles = app.config['PAGE_STYLES']
selected_style = request.args.get('style', None)
if selected_style:
user_style = selected_style
else:
user_style = request.cookies.get('user-style')
logger.debug("user style cookie: %s", user_style)
context['user_style'] = page_styles.get(user_style, page_styles.get(app.config['DEFAULT_PAGE_STYLE']))
context['page_styles'] = page_styles
resp = make_response(render_template(template_name_or_list, **context))
if selected_style:
resp.set_cookie('user-style', selected_style)
return resp

View File

@ -6,77 +6,49 @@ SPDX-License-Identifier: AGPL-3.0-or-later
import logging import logging
import os import os
from flask import Blueprint, abort
from flask import current_app as app
from flask import redirect, request, send_from_directory
from markupsafe import Markup from markupsafe import Markup
from werkzeug.security import safe_join from werkzeug.security import safe_join
from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md, render from incorporealcms import env
from incorporealcms.config import Config
from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
bp = Blueprint('pages', __name__, url_prefix='/')
def handle_markdown_file_path(path: str) -> str:
@bp.route('/', defaults={'path': 'index'})
@bp.route('/<path:path>')
def display_page(path):
"""Get the file contents of the requested path and render the file."""
try:
resolved_path, render_type = request_path_to_instance_resource_path(path)
logger.debug("received request for path '%s', resolved to '%s', type '%s'",
path, resolved_path, render_type)
except PermissionError:
abort(400)
except IsADirectoryError:
return redirect(f'/{path}/', code=301)
except FileNotFoundError:
abort(404)
if render_type == 'file':
return send_from_directory(app.instance_path, resolved_path)
elif render_type == 'symlink':
logger.debug("attempting to redirect path '%s' to reverse of resource '%s'", path, resolved_path)
redirect_path = f'/{instance_resource_path_to_request_path(resolved_path)}'
logger.debug("redirect path: '%s'", redirect_path)
return redirect(redirect_path, code=301)
elif render_type == 'markdown':
logger.debug("treating path '%s' as markdown '%s'", path, resolved_path)
return handle_markdown_file_path(resolved_path)
else:
logger.exception("unsupported render_type '%s'!?", render_type)
abort(500)
def handle_markdown_file_path(resolved_path):
"""Given a location on disk, attempt to open it and render the markdown within.""" """Given a location on disk, attempt to open it and render the markdown within."""
try: try:
content, md, page_name, page_title, mtime = parse_md(resolved_path) content, md, page_name, page_title, mtime = parse_md(path)
except OSError: except OSError:
logger.exception("resolved path '%s' could not be opened!", resolved_path) logger.exception("path '%s' could not be opened!", path)
abort(500) raise
except ValueError: except ValueError:
logger.exception("error parsing/rendering markdown!") logger.exception("error parsing/rendering markdown!")
abort(500) raise
except TypeError: except TypeError:
logger.exception("error loading/rendering markdown!") logger.exception("error loading/rendering markdown!")
abort(500) raise
else: else:
parent_navs = generate_parent_navs(resolved_path) parent_navs = generate_parent_navs(path)
extra_footer = get_meta_str(md, 'footer') if md.Meta.get('footer') else None extra_footer = get_meta_str(md, 'footer') if md.Meta.get('footer') else None
template = get_meta_str(md, 'template') if md.Meta.get('template') else 'base.html' template_name = get_meta_str(md, 'template') if md.Meta.get('template') else 'base.html'
# check if this has a HTTP redirect # check if this has a HTTP redirect
redirect_url = get_meta_str(md, 'redirect') if md.Meta.get('redirect') else None redirect_url = get_meta_str(md, 'redirect') if md.Meta.get('redirect') else None
if redirect_url: if redirect_url:
logger.debug("redirecting via meta tag to '%s'", redirect_url) raise ValueError("redirects in markdown are unsupported!")
return redirect(redirect_url, code=301)
return render(template, title=page_title, description=get_meta_str(md, 'description'), template = env.get_template(template_name)
image=get_meta_str(md, 'image'), base_url=request.base_url, content=content, return template.render(title=page_title,
navs=parent_navs, mtime=mtime.strftime('%Y-%m-%d %H:%M:%S %Z'), config=Config,
extra_footer=extra_footer) description=get_meta_str(md, 'description'),
image=get_meta_str(md, 'image'),
content=content,
user_style=Config.PAGE_STYLES.get(Config.DEFAULT_PAGE_STYLE),
base_url=Config.BASE_HOST, navs=parent_navs,
mtime=mtime.strftime('%Y-%m-%d %H:%M:%S %Z'),
extra_footer=extra_footer)
def request_path_to_instance_resource_path(path): def request_path_to_instance_resource_path(path):
@ -139,9 +111,9 @@ def request_path_to_instance_resource_path(path):
def generate_parent_navs(path): def generate_parent_navs(path):
"""Create a series of paths/links to navigate up from the given resource path.""" """Create a series of paths/links to navigate up from the given resource path."""
if path == 'pages/index.md': if path == 'index.md':
# bail and return the domain name as a terminal case # bail and return the domain name as a terminal case
return [(app.config['DOMAIN_NAME'], '/')] return [(Config.DOMAIN_NAME, '/')]
else: else:
if path.endswith('index.md'): if path.endswith('index.md'):
# index case: one dirname for foo/bar/index.md -> foo/bar, one for foo/bar -> foo # index case: one dirname for foo/bar/index.md -> foo/bar, one for foo/bar -> foo
@ -152,7 +124,7 @@ def generate_parent_navs(path):
# generate the request path (i.e. what the link will be) for this path, and # generate the request path (i.e. what the link will be) for this path, and
# also the resource path of this parent (which is always a dir, so always index.md) # also the resource path of this parent (which is always a dir, so always index.md)
request_path = f'/{instance_resource_path_to_request_path(path)}' request_path = instance_resource_path_to_request_path(path)
parent_resource_path = os.path.join(parent_resource_dir, 'index.md') parent_resource_path = os.path.join(parent_resource_dir, 'index.md')
logger.debug("resource path: '%s'; request path: '%s'; parent resource path: '%s'", path, logger.debug("resource path: '%s'; request path: '%s'; parent resource path: '%s'", path,
@ -163,7 +135,7 @@ def generate_parent_navs(path):
# read the resource # read the resource
try: try:
with app.open_instance_resource(path, 'r') as entry_file: with open(path, 'r') as entry_file:
entry = entry_file.read() entry = entry_file.read()
_ = Markup(md.convert(entry)) _ = Markup(md.convert(entry))
page_name = (" ".join(md.Meta.get('title')) if md.Meta.get('title') page_name = (" ".join(md.Meta.get('title')) if md.Meta.get('title')

120
incorporealcms/ssg.py Normal file
View File

@ -0,0 +1,120 @@
"""Build an instance as a static site suitable for serving via e.g. Nginx.
SPDX-FileCopyrightText: © 2022 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import argparse
import os
import shutil
import stat
import tempfile
from termcolor import cprint
from incorporealcms import init_instance
from incorporealcms.pages import handle_markdown_file_path
def build():
"""Build the static site generated against an instance directory."""
parser = argparse.ArgumentParser(
description="Build the static site generated against an instance directory.",
)
parser.add_argument(
'instance_dir', help="path to instance directory root (NOTE: the program will go into pages/)"
)
parser.add_argument(
'output_dir', help="path to directory to output to (NOTE: the program must be able to write into its parent!)"
)
args = parser.parse_args()
# check output path before doing work
if not os.path.isdir(args.output_dir):
# if it doesn't exist, great, we'll just move the temporary dir later;
# if it exists and is a dir, that's fine, but if it's a file, we should error
if os.path.exists(args.output_dir):
raise ValueError(f"specified output path '{args.output_dir}' exists as a file!")
output_dir = os.path.abspath(args.output_dir)
instance_dir = os.path.abspath(args.instance_dir)
pages_dir = os.path.join(instance_dir, 'pages')
# initialize configuration with the path to the instance
init_instance(instance_dir)
# putting the temporary directory next to the desired output so we can safely rename it later
tmp_output_dir = tempfile.mkdtemp(dir=os.path.dirname(output_dir))
cprint(f"creating temporary directory '{tmp_output_dir}' for writing", 'green')
# CORE CONTENT
# render and/or copy into the output dir after changing into the instance dir (to simplify paths)
os.chdir(pages_dir)
for base_dir, subdirs, files in os.walk(pages_dir):
# remove the absolute path of the pages directory from the base_dir
base_dir = os.path.relpath(base_dir, pages_dir)
# create subdirs seen here for subsequent depth
for subdir in subdirs:
dst = os.path.join(tmp_output_dir, base_dir, subdir)
if os.path.islink(os.path.join(base_dir, subdir)):
# keep the link relative to the output directory
src = symlink_to_relative_dest(pages_dir, os.path.join(base_dir, subdir))
print(f"creating directory symlink '{dst}' -> '{src}'")
os.symlink(src, dst, target_is_directory=True)
else:
print(f"creating directory '{dst}'")
os.mkdir(dst)
# process and copy files
for file_ in files:
dst = os.path.join(tmp_output_dir, base_dir, file_)
if os.path.islink(os.path.join(base_dir, file_)):
# keep the link relative to the output directory
src = symlink_to_relative_dest(pages_dir, os.path.join(base_dir, file_))
print(f"creating symlink '{dst}' -> '{src}'")
os.symlink(src, dst, target_is_directory=False)
else:
src = os.path.join(base_dir, file_)
print(f"copying file '{src}' -> '{dst}'")
shutil.copy2(src, dst)
# render markdown as HTML
if src.endswith('.md'):
rendered_file = dst.removesuffix('.md') + '.html'
try:
content = handle_markdown_file_path(src)
except UnicodeDecodeError:
# perhaps this isn't a markdown file at all for some reason; we
# copied it above so stick with tha
cprint(f"{src} has invalid bytes! skipping", 'yellow')
continue
with open(rendered_file, 'w') as dst_file:
dst_file.write(content)
# TODO: STATIC DIR
# move temporary dir to the destination
old_output_dir = f'{output_dir}-old-{os.path.basename(tmp_output_dir)}'
if os.path.exists(output_dir):
cprint(f"renaming '{output_dir}' to '{old_output_dir}'", 'green')
os.rename(output_dir, old_output_dir)
cprint(f"renaming '{tmp_output_dir}' to '{output_dir}'", 'green')
os.rename(tmp_output_dir, output_dir)
os.chmod(output_dir,
stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
# TODO: unlink old dir above? arg flag?
def symlink_to_relative_dest(base_dir: str, source: str) -> str:
"""Given a symlink, make sure it points to something inside the instance and provide its real destination.
Args:
base_dir: the full absolute path of the instance's pages dir, which the symlink destination must be in.
source: the symlink to check
Returns:
what the symlink points at
"""
if not os.path.realpath(source).startswith(base_dir):
raise ValueError(f"symlink destination {os.path.realpath(source)} is outside the instance!")
# this symlink points to realpath inside base_dir, so relative to base_dir, the symlink dest is...
return os.path.relpath(os.path.realpath(source), base_dir)

View File

@ -8,12 +8,12 @@ SPDX-License-Identifier: AGPL-3.0-or-later
<title>{{ title }}</title> <title>{{ title }}</title>
{% if title %}<meta property="og:title" content="{{ title }}">{% endif %} {% if title %}<meta property="og:title" content="{{ title }}">{% endif %}
{% if description %}<meta property="og:description" content="{{ description }}">{% endif %} {% if description %}<meta property="og:description" content="{{ description }}">{% endif %}
{% if image %}<meta property="og:image" content="{{ image }}">{% endif %}
<meta property="og:url" content="{{ base_url }}"> <meta property="og:url" content="{{ base_url }}">
{% if image %}<meta property="og:image" content="{{ image }}">{% endif %}
<meta name="twitter:card" content="summary_large_image"> <meta name="twitter:card" content="summary_large_image">
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="{{ user_style }}"> <link rel="stylesheet" href="{{ user_style }}">
<link rel="icon" href="{% if config.FAVICON %}{{ config.FAVICON }}{% else %}{{ url_for('static', filename='img/favicon.png') }}{% endif %}"> <link rel="icon" href="{% if config.FAVICON %}{{ config.FAVICON }}{% else %}/static/img/favicon.png{% endif %}">
<link rel="alternate" type="application/atom+xml" href="/feed/atom"> <link rel="alternate" type="application/atom+xml" href="/feed/atom">
<link rel="alternate" type="application/rss+xml" href="/feed/rss"> <link rel="alternate" type="application/rss+xml" href="/feed/rss">

View File

@ -0,0 +1,27 @@
{
"LOGGING": {
"version": 1,
"formatters": {
"default": {
"format": "[%(asctime)s %(levelname)-7s %(name)s] %(message)s"
}
},
"handlers": {
"console": {
"level": "DEBUG",
"class": "logging.StreamHandler",
"formatter": "default"
}
},
"loggers": {
"incorporealcms.mdx": {
"level": "DEBUG",
"handlers": ["console"]
},
"incorporealcms.pages": {
"level": "DEBUG",
"handlers": ["console"]
}
}
}
}

View File

@ -1 +0,0 @@
Redirect: http://www.google.com/