static site generator part 1 --- incomplete, but kinda works

Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
This commit is contained in:
Brian S. Stephan 2025-03-13 16:14:12 -05:00
parent 68d6f83b4c
commit 0d59e64323
Signed by: bss
GPG Key ID: 3DE06D3180895FCB
8 changed files with 224 additions and 133 deletions

View File

@ -3,48 +3,36 @@
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import json
import logging
import os
from logging.config import dictConfig
from flask import Flask, request
from jinja2 import Environment, PackageLoader, select_autoescape
from incorporealcms.config import Config
env = Environment(
loader=PackageLoader('incorporealcms'),
autoescape=select_autoescape(),
)
def create_app(instance_path=None, test_config=None):
"""Create the Flask app, with allowances for customizing path and test settings."""
app = Flask(__name__, instance_relative_config=True, instance_path=instance_path)
def init_instance(instance_path: str, test_config: dict = None):
"""Create the instance context, with allowances for customizing path and test settings."""
# load the instance config.json, if there is one
instance_config = os.path.join(instance_path, 'config.json')
if os.path.isfile(instance_config):
with open(instance_config, 'r') as config:
Config.update(json.load(config))
# if it doesn't already exist, create the instance folder
os.makedirs(app.instance_path, exist_ok=True)
# load defaults from config provided with the application
app.config.from_object('incorporealcms.config.Config')
# load specific instance configurations
app.config.from_pyfile('config.py', silent=True)
if test_config:
app.config.from_mapping(test_config)
Config.update(test_config)
dictConfig(app.config['LOGGING'])
# stash some stuff
Config.INSTANCE_DIR = os.path.abspath(instance_path)
dictConfig(Config.LOGGING)
logger = logging.getLogger(__name__)
logger.debug("instance path: %s", app.instance_path)
@app.before_request
def log_request():
logger.info("REQUEST: %s %s", request.method, request.path)
@app.after_request
def log_response(response):
logger.info("RESPONSE: %s %s: %s", request.method, request.path, response.status)
return response
from . import error_pages, feed, pages, static
app.register_blueprint(feed.bp)
app.register_blueprint(pages.bp)
app.register_blueprint(static.bp)
app.register_error_handler(400, error_pages.bad_request)
app.register_error_handler(404, error_pages.page_not_found)
app.register_error_handler(500, error_pages.internal_server_error)
return app
logger.debug("instance dir: %s", Config.INSTANCE_DIR)

View File

@ -61,11 +61,18 @@ class Config(object):
}
DEFAULT_PAGE_STYLE = 'light'
DOMAIN_NAME = 'example.com'
DOMAIN_NAME = 'example.org'
TITLE_SUFFIX = DOMAIN_NAME
BASE_HOST = 'http://' + DOMAIN_NAME
CONTACT_EMAIL = 'admin@example.com'
# feed settings
AUTHOR = {'name': 'Test Name', 'email': 'admin@example.com'}
# specify FAVICON in your instance config.py to override the provided icon
@classmethod
def update(cls, config: dict):
"""Update this configuration with a dictionary of values from elsewhere."""
for key, value in config.items():
setattr(cls, key, value)

View File

@ -9,10 +9,10 @@ import os
import re
import markdown
from flask import current_app as app
from flask import make_response, render_template, request
from markupsafe import Markup
from incorporealcms.config import Config
logger = logging.getLogger(__name__)
@ -30,31 +30,31 @@ def init_md():
"""
# initialize markdown parser from config, but include
# extensions our app depends on, like the meta extension
return markdown.Markdown(extensions=app.config['MARKDOWN_EXTENSIONS'] + ['meta'],
extension_configs=app.config['MARKDOWN_EXTENSION_CONFIGS'])
return markdown.Markdown(extensions=Config.MARKDOWN_EXTENSIONS + ['meta'],
extension_configs=Config.MARKDOWN_EXTENSION_CONFIGS)
def instance_resource_path_to_request_path(path):
"""Reverse a (presumed to exist) RELATIVE disk path to the canonical path that would show up in a Flask route.
"""Reverse a relative disk path to the path that would show up in a URL request."""
return '/' + re.sub(r'.md$', '', re.sub(r'index.md$', '', path))
This does not include the leading /, so aside from the root index case, this should be
bidirectional.
def parse_md(path: str):
"""Given a file to parse, return file content and other derived data along with the md object.
Args:
path: the path to the file to render
"""
return re.sub(r'^pages/', '', re.sub(r'.md$', '', re.sub(r'index.md$', '', path)))
def parse_md(resolved_path):
"""Given a file to parse, return file content and other derived data along with the md object."""
try:
logger.debug("opening resolved path '%s'", resolved_path)
with app.open_instance_resource(resolved_path, 'r') as entry_file:
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(entry_file.name), tz=datetime.timezone.utc)
entry = entry_file.read()
logger.debug("resolved path '%s' read", resolved_path)
logger.debug("opening path '%s'", path)
with open(path, 'r') as input_file:
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(input_file.name), tz=datetime.timezone.utc)
entry = input_file.read()
logger.debug("path '%s' read", path)
md = init_md()
content = Markup(md.convert(entry))
except OSError:
logger.exception("resolved path '%s' could not be opened!", resolved_path)
logger.exception("path '%s' could not be opened!", path)
raise
except ValueError:
logger.exception("error parsing/rendering markdown!")
@ -65,30 +65,8 @@ def parse_md(resolved_path):
logger.debug("file metadata: %s", md.Meta)
page_name = (get_meta_str(md, 'title') if md.Meta.get('title') else
f'/{instance_resource_path_to_request_path(resolved_path)}')
page_title = f'{page_name} - {app.config["TITLE_SUFFIX"]}' if page_name else app.config['TITLE_SUFFIX']
page_name = get_meta_str(md, 'title') if md.Meta.get('title') else path
page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
logger.debug("title (potentially derived): %s", page_title)
return content, md, page_name, page_title, mtime
def render(template_name_or_list, **context):
"""Wrap Flask's render_template.
* Determine the proper site theme to use in the template and provide it.
"""
page_styles = app.config['PAGE_STYLES']
selected_style = request.args.get('style', None)
if selected_style:
user_style = selected_style
else:
user_style = request.cookies.get('user-style')
logger.debug("user style cookie: %s", user_style)
context['user_style'] = page_styles.get(user_style, page_styles.get(app.config['DEFAULT_PAGE_STYLE']))
context['page_styles'] = page_styles
resp = make_response(render_template(template_name_or_list, **context))
if selected_style:
resp.set_cookie('user-style', selected_style)
return resp

View File

@ -6,77 +6,49 @@ SPDX-License-Identifier: AGPL-3.0-or-later
import logging
import os
from flask import Blueprint, abort
from flask import current_app as app
from flask import redirect, request, send_from_directory
from markupsafe import Markup
from werkzeug.security import safe_join
from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md, render
from incorporealcms import env
from incorporealcms.config import Config
from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md
logger = logging.getLogger(__name__)
bp = Blueprint('pages', __name__, url_prefix='/')
@bp.route('/', defaults={'path': 'index'})
@bp.route('/<path:path>')
def display_page(path):
"""Get the file contents of the requested path and render the file."""
try:
resolved_path, render_type = request_path_to_instance_resource_path(path)
logger.debug("received request for path '%s', resolved to '%s', type '%s'",
path, resolved_path, render_type)
except PermissionError:
abort(400)
except IsADirectoryError:
return redirect(f'/{path}/', code=301)
except FileNotFoundError:
abort(404)
if render_type == 'file':
return send_from_directory(app.instance_path, resolved_path)
elif render_type == 'symlink':
logger.debug("attempting to redirect path '%s' to reverse of resource '%s'", path, resolved_path)
redirect_path = f'/{instance_resource_path_to_request_path(resolved_path)}'
logger.debug("redirect path: '%s'", redirect_path)
return redirect(redirect_path, code=301)
elif render_type == 'markdown':
logger.debug("treating path '%s' as markdown '%s'", path, resolved_path)
return handle_markdown_file_path(resolved_path)
else:
logger.exception("unsupported render_type '%s'!?", render_type)
abort(500)
def handle_markdown_file_path(resolved_path):
def handle_markdown_file_path(path: str) -> str:
"""Given a location on disk, attempt to open it and render the markdown within."""
try:
content, md, page_name, page_title, mtime = parse_md(resolved_path)
content, md, page_name, page_title, mtime = parse_md(path)
except OSError:
logger.exception("resolved path '%s' could not be opened!", resolved_path)
abort(500)
logger.exception("path '%s' could not be opened!", path)
raise
except ValueError:
logger.exception("error parsing/rendering markdown!")
abort(500)
raise
except TypeError:
logger.exception("error loading/rendering markdown!")
abort(500)
raise
else:
parent_navs = generate_parent_navs(resolved_path)
parent_navs = generate_parent_navs(path)
extra_footer = get_meta_str(md, 'footer') if md.Meta.get('footer') else None
template = get_meta_str(md, 'template') if md.Meta.get('template') else 'base.html'
template_name = get_meta_str(md, 'template') if md.Meta.get('template') else 'base.html'
# check if this has a HTTP redirect
redirect_url = get_meta_str(md, 'redirect') if md.Meta.get('redirect') else None
if redirect_url:
logger.debug("redirecting via meta tag to '%s'", redirect_url)
return redirect(redirect_url, code=301)
raise ValueError("redirects in markdown are unsupported!")
return render(template, title=page_title, description=get_meta_str(md, 'description'),
image=get_meta_str(md, 'image'), base_url=request.base_url, content=content,
navs=parent_navs, mtime=mtime.strftime('%Y-%m-%d %H:%M:%S %Z'),
extra_footer=extra_footer)
template = env.get_template(template_name)
return template.render(title=page_title,
config=Config,
description=get_meta_str(md, 'description'),
image=get_meta_str(md, 'image'),
content=content,
user_style=Config.PAGE_STYLES.get(Config.DEFAULT_PAGE_STYLE),
base_url=Config.BASE_HOST, navs=parent_navs,
mtime=mtime.strftime('%Y-%m-%d %H:%M:%S %Z'),
extra_footer=extra_footer)
def request_path_to_instance_resource_path(path):
@ -139,9 +111,9 @@ def request_path_to_instance_resource_path(path):
def generate_parent_navs(path):
"""Create a series of paths/links to navigate up from the given resource path."""
if path == 'pages/index.md':
if path == 'index.md':
# bail and return the domain name as a terminal case
return [(app.config['DOMAIN_NAME'], '/')]
return [(Config.DOMAIN_NAME, '/')]
else:
if path.endswith('index.md'):
# index case: one dirname for foo/bar/index.md -> foo/bar, one for foo/bar -> foo
@ -152,7 +124,7 @@ def generate_parent_navs(path):
# generate the request path (i.e. what the link will be) for this path, and
# also the resource path of this parent (which is always a dir, so always index.md)
request_path = f'/{instance_resource_path_to_request_path(path)}'
request_path = instance_resource_path_to_request_path(path)
parent_resource_path = os.path.join(parent_resource_dir, 'index.md')
logger.debug("resource path: '%s'; request path: '%s'; parent resource path: '%s'", path,
@ -163,7 +135,7 @@ def generate_parent_navs(path):
# read the resource
try:
with app.open_instance_resource(path, 'r') as entry_file:
with open(path, 'r') as entry_file:
entry = entry_file.read()
_ = Markup(md.convert(entry))
page_name = (" ".join(md.Meta.get('title')) if md.Meta.get('title')

120
incorporealcms/ssg.py Normal file
View File

@ -0,0 +1,120 @@
"""Build an instance as a static site suitable for serving via e.g. Nginx.
SPDX-FileCopyrightText: © 2022 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import argparse
import os
import shutil
import stat
import tempfile
from termcolor import cprint
from incorporealcms import init_instance
from incorporealcms.pages import handle_markdown_file_path
def build():
"""Build the static site generated against an instance directory."""
parser = argparse.ArgumentParser(
description="Build the static site generated against an instance directory.",
)
parser.add_argument(
'instance_dir', help="path to instance directory root (NOTE: the program will go into pages/)"
)
parser.add_argument(
'output_dir', help="path to directory to output to (NOTE: the program must be able to write into its parent!)"
)
args = parser.parse_args()
# check output path before doing work
if not os.path.isdir(args.output_dir):
# if it doesn't exist, great, we'll just move the temporary dir later;
# if it exists and is a dir, that's fine, but if it's a file, we should error
if os.path.exists(args.output_dir):
raise ValueError(f"specified output path '{args.output_dir}' exists as a file!")
output_dir = os.path.abspath(args.output_dir)
instance_dir = os.path.abspath(args.instance_dir)
pages_dir = os.path.join(instance_dir, 'pages')
# initialize configuration with the path to the instance
init_instance(instance_dir)
# putting the temporary directory next to the desired output so we can safely rename it later
tmp_output_dir = tempfile.mkdtemp(dir=os.path.dirname(output_dir))
cprint(f"creating temporary directory '{tmp_output_dir}' for writing", 'green')
# CORE CONTENT
# render and/or copy into the output dir after changing into the instance dir (to simplify paths)
os.chdir(pages_dir)
for base_dir, subdirs, files in os.walk(pages_dir):
# remove the absolute path of the pages directory from the base_dir
base_dir = os.path.relpath(base_dir, pages_dir)
# create subdirs seen here for subsequent depth
for subdir in subdirs:
dst = os.path.join(tmp_output_dir, base_dir, subdir)
if os.path.islink(os.path.join(base_dir, subdir)):
# keep the link relative to the output directory
src = symlink_to_relative_dest(pages_dir, os.path.join(base_dir, subdir))
print(f"creating directory symlink '{dst}' -> '{src}'")
os.symlink(src, dst, target_is_directory=True)
else:
print(f"creating directory '{dst}'")
os.mkdir(dst)
# process and copy files
for file_ in files:
dst = os.path.join(tmp_output_dir, base_dir, file_)
if os.path.islink(os.path.join(base_dir, file_)):
# keep the link relative to the output directory
src = symlink_to_relative_dest(pages_dir, os.path.join(base_dir, file_))
print(f"creating symlink '{dst}' -> '{src}'")
os.symlink(src, dst, target_is_directory=False)
else:
src = os.path.join(base_dir, file_)
print(f"copying file '{src}' -> '{dst}'")
shutil.copy2(src, dst)
# render markdown as HTML
if src.endswith('.md'):
rendered_file = dst.removesuffix('.md') + '.html'
try:
content = handle_markdown_file_path(src)
except UnicodeDecodeError:
# perhaps this isn't a markdown file at all for some reason; we
# copied it above so stick with tha
cprint(f"{src} has invalid bytes! skipping", 'yellow')
continue
with open(rendered_file, 'w') as dst_file:
dst_file.write(content)
# TODO: STATIC DIR
# move temporary dir to the destination
old_output_dir = f'{output_dir}-old-{os.path.basename(tmp_output_dir)}'
if os.path.exists(output_dir):
cprint(f"renaming '{output_dir}' to '{old_output_dir}'", 'green')
os.rename(output_dir, old_output_dir)
cprint(f"renaming '{tmp_output_dir}' to '{output_dir}'", 'green')
os.rename(tmp_output_dir, output_dir)
os.chmod(output_dir,
stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
# TODO: unlink old dir above? arg flag?
def symlink_to_relative_dest(base_dir: str, source: str) -> str:
"""Given a symlink, make sure it points to something inside the instance and provide its real destination.
Args:
base_dir: the full absolute path of the instance's pages dir, which the symlink destination must be in.
source: the symlink to check
Returns:
what the symlink points at
"""
if not os.path.realpath(source).startswith(base_dir):
raise ValueError(f"symlink destination {os.path.realpath(source)} is outside the instance!")
# this symlink points to realpath inside base_dir, so relative to base_dir, the symlink dest is...
return os.path.relpath(os.path.realpath(source), base_dir)

View File

@ -8,12 +8,12 @@ SPDX-License-Identifier: AGPL-3.0-or-later
<title>{{ title }}</title>
{% if title %}<meta property="og:title" content="{{ title }}">{% endif %}
{% if description %}<meta property="og:description" content="{{ description }}">{% endif %}
{% if image %}<meta property="og:image" content="{{ image }}">{% endif %}
<meta property="og:url" content="{{ base_url }}">
{% if image %}<meta property="og:image" content="{{ image }}">{% endif %}
<meta name="twitter:card" content="summary_large_image">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="{{ user_style }}">
<link rel="icon" href="{% if config.FAVICON %}{{ config.FAVICON }}{% else %}{{ url_for('static', filename='img/favicon.png') }}{% endif %}">
<link rel="icon" href="{% if config.FAVICON %}{{ config.FAVICON }}{% else %}/static/img/favicon.png{% endif %}">
<link rel="alternate" type="application/atom+xml" href="/feed/atom">
<link rel="alternate" type="application/rss+xml" href="/feed/rss">

View File

@ -0,0 +1,27 @@
{
"LOGGING": {
"version": 1,
"formatters": {
"default": {
"format": "[%(asctime)s %(levelname)-7s %(name)s] %(message)s"
}
},
"handlers": {
"console": {
"level": "DEBUG",
"class": "logging.StreamHandler",
"formatter": "default"
}
},
"loggers": {
"incorporealcms.mdx": {
"level": "DEBUG",
"handlers": ["console"]
},
"incorporealcms.pages": {
"level": "DEBUG",
"handlers": ["console"]
}
}
}
}

View File

@ -1 +0,0 @@
Redirect: http://www.google.com/