rewrite the project as a static site generator

this removes Flask, reworks a number of library methods accordingly, and
adds generators and build commands to process the instance directory
(largely unchanged, except config.py is now config.json) and spit out
files suitable to be served by a web server such as Nginx.

there are probably some rough edges here, but overall this works.

also note, as this is no longer server software on a network, the
license has changed from AGPLv3 to GPLv3, and the "or any later version"
allowance has been removed

Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
This commit is contained in:
2025-03-12 10:28:38 -05:00
parent ed12272d4d
commit 7eb485c6ae
45 changed files with 1389 additions and 1587 deletions

View File

@@ -1,50 +1,47 @@
"""An application for running my Markdown-based sites.
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-only
"""
import json
import logging
import os
from logging.config import dictConfig
from flask import Flask, request
from termcolor import cprint
from incorporealcms.config import Config
# dynamically generate version number
try:
# packaged/pip install -e . value
from ._version import version as __version__
except ImportError:
# local clone value
from setuptools_scm import get_version
__version__ = get_version(root='..', relative_to=__file__)
def create_app(instance_path=None, test_config=None):
"""Create the Flask app, with allowances for customizing path and test settings."""
app = Flask(__name__, instance_relative_config=True, instance_path=instance_path)
def init_instance(instance_path: str, extra_config: dict = None):
"""Create the instance context, with allowances for customizing path and test settings."""
# load the instance config.json, if there is one
instance_config = os.path.join(instance_path, 'config.json')
try:
with open(instance_config, 'r') as config:
config_dict = json.load(config)
cprint(f"splicing {config_dict} into the config", 'yellow')
Config.update(config_dict)
except OSError:
raise ValueError("instance path does not seem to be a site instance!")
# if it doesn't already exist, create the instance folder
os.makedirs(app.instance_path, exist_ok=True)
if extra_config:
cprint(f"splicing {extra_config} into the config", 'yellow')
Config.update(extra_config)
# load defaults from config provided with the application
app.config.from_object('incorporealcms.config.Config')
# load specific instance configurations
app.config.from_pyfile('config.py', silent=True)
if test_config:
app.config.from_mapping(test_config)
dictConfig(app.config['LOGGING'])
# stash some stuff
Config.INSTANCE_DIR = os.path.abspath(instance_path)
dictConfig(Config.LOGGING)
logger = logging.getLogger(__name__)
logger.debug("instance path: %s", app.instance_path)
@app.before_request
def log_request():
logger.info("REQUEST: %s %s", request.method, request.path)
@app.after_request
def log_response(response):
logger.info("RESPONSE: %s %s: %s", request.method, request.path, response.status)
return response
from . import error_pages, feed, pages, static
app.register_blueprint(feed.bp)
app.register_blueprint(pages.bp)
app.register_blueprint(static.bp)
app.register_error_handler(400, error_pages.bad_request)
app.register_error_handler(404, error_pages.page_not_found)
app.register_error_handler(500, error_pages.internal_server_error)
return app
logger.debug("instance dir: %s", Config.INSTANCE_DIR)

View File

@@ -1,7 +1,7 @@
"""Default configuration.
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-only
"""
@@ -51,8 +51,6 @@ class Config(object):
},
}
MEDIA_DIR = 'media'
# customizations
PAGE_STYLES = {
'dark': '/static/css/dark.css',
@@ -61,11 +59,18 @@ class Config(object):
}
DEFAULT_PAGE_STYLE = 'light'
DOMAIN_NAME = 'example.com'
DOMAIN_NAME = 'example.org'
TITLE_SUFFIX = DOMAIN_NAME
CONTACT_EMAIL = 'admin@example.com'
BASE_HOST = 'http://' + DOMAIN_NAME
CONTACT_EMAIL = 'admin@example.org'
# feed settings
AUTHOR = {'name': 'Test Name', 'email': 'admin@example.com'}
AUTHOR = {'name': 'Test Name', 'email': 'admin@example.org'}
# specify FAVICON in your instance config.py to override the provided icon
FAVICON = '/static/img/favicon.png'
@classmethod
def update(cls, config: dict):
"""Update this configuration with a dictionary of values from elsewhere."""
for key, value in config.items():
setattr(cls, key, value)

View File

@@ -1,21 +0,0 @@
"""Error page views for 400, 404, etc.
SPDX-FileCopyrightText: © 2021 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
from incorporealcms.lib import render
def bad_request(error):
"""Display 400 error messaging."""
return render('400.html'), 400
def internal_server_error(error):
"""Display 500 error messaging."""
return render('500.html'), 500
def page_not_found(error):
"""Display 404 error messaging."""
return render('404.html'), 404

View File

@@ -7,67 +7,76 @@ under pages/ (which may make sense for a blog) if they want, but could just
as well be pages/foo content.
SPDX-FileCopyrightText: © 2023 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-only
"""
import logging
import os
import re
from feedgen.feed import FeedGenerator
from flask import Blueprint, Response, abort
from flask import current_app as app
from incorporealcms.lib import instance_resource_path_to_request_path, parse_md
from incorporealcms.config import Config
from incorporealcms.markdown import instance_resource_path_to_request_path, parse_md
logger = logging.getLogger(__name__)
bp = Blueprint('feed', __name__, url_prefix='/feed')
def generate_feed(feed_type: str, instance_dir: str, dest_dir: str) -> None:
"""Generate the Atom or RSS feed as requested.
@bp.route('/<feed_type>')
def serve_feed(feed_type):
"""Serve the Atom or RSS feed as requested."""
logger.warning("wat")
if feed_type not in ('atom', 'rss'):
abort(404)
Args:
feed_type: 'atom' or 'rss' feed
instance_dir: the directory for the instance, containing both the feed dir and pages
dest_dir: the directory to place the feed subdir and requested feed
"""
fg = FeedGenerator()
fg.id(f'https://{app.config["DOMAIN_NAME"]}/')
fg.title(f'{app.config["TITLE_SUFFIX"]}')
fg.author(app.config["AUTHOR"])
fg.link(href=f'https://{app.config["DOMAIN_NAME"]}/feed/{feed_type}', rel='self')
fg.link(href=f'https://{app.config["DOMAIN_NAME"]}', rel='alternate')
fg.subtitle(f"Blog posts and other dated materials from {app.config['TITLE_SUFFIX']}")
fg.id(f'https://{Config.DOMAIN_NAME}/')
fg.title(f'{Config.TITLE_SUFFIX}')
fg.author(Config.AUTHOR)
fg.link(href=f'https://{Config.DOMAIN_NAME}/feed/{feed_type}', rel='self')
fg.link(href=f'https://{Config.DOMAIN_NAME}', rel='alternate')
fg.subtitle(f"Blog posts and other interesting materials from {Config.TITLE_SUFFIX}")
# get recent feeds
feed_path = os.path.join(app.instance_path, 'feed')
feed_path = os.path.join(instance_dir, 'feed')
feed_entry_paths = [os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(feed_path)
for filename in filenames if os.path.islink(os.path.join(dirpath, filename))]
for feed_entry_path in sorted(feed_entry_paths):
# get the actual file to parse it
resolved_path = os.path.realpath(feed_entry_path).replace(f'{app.instance_path}/', '')
os.chdir(os.path.abspath(os.path.join(instance_dir, 'pages')))
resolved_path = os.path.relpath(os.path.realpath(feed_entry_path), os.path.join(instance_dir, 'pages'))
try:
content, md, page_name, page_title, mtime = parse_md(resolved_path)
link = f'https://{app.config["DOMAIN_NAME"]}/{instance_resource_path_to_request_path(resolved_path)}'
link = f'https://{Config.DOMAIN_NAME}{instance_resource_path_to_request_path(resolved_path)}'
except (OSError, ValueError, TypeError):
logger.exception("error loading/rendering markdown!")
abort(500)
raise
fe = fg.add_entry()
fe.id(_generate_feed_id(feed_entry_path))
fe.title(page_name if page_name else page_title)
fe.author(app.config["AUTHOR"])
fe.id(_generate_feed_id(feed_entry_path, instance_resource_path_to_request_path(resolved_path)))
fe.title(page_title)
fe.author(Config.AUTHOR)
fe.link(href=link)
fe.content(content, type='html')
if feed_type == 'atom':
return Response(fg.atom_str(pretty=True), mimetype='application/atom+xml')
if feed_type == 'rss':
try:
os.mkdir(os.path.join(dest_dir, 'feed'))
except FileExistsError:
pass
with open(os.path.join(dest_dir, 'feed', 'rss'), 'wb') as feed_file:
feed_file.write(fg.rss_str(pretty=True))
else:
return Response(fg.rss_str(pretty=True), mimetype='application/rss+xml')
try:
os.mkdir(os.path.join(dest_dir, 'feed'))
except FileExistsError:
pass
with open(os.path.join(dest_dir, 'feed', 'atom'), 'wb') as feed_file:
feed_file.write(fg.atom_str(pretty=True))
def _generate_feed_id(feed_entry_path):
def _generate_feed_id(feed_entry_path, request_path):
"""For a relative file path, generate the Atom/RSS feed ID for it."""
date = re.sub(r'.*(\d{4})(\d{2})(\d{2}).*', r'\1-\2-\3', feed_entry_path)
cleaned = feed_entry_path.replace('#', '/').replace('feed/', '', 1).replace(app.instance_path, '')
return f'tag:{app.config["DOMAIN_NAME"]},{date}:{cleaned}'
cleaned = request_path.replace('#', '/')
return f'tag:{Config.DOMAIN_NAME},{date}:{cleaned}'

View File

@@ -1,94 +0,0 @@
"""Miscellaneous helper functions and whatnot.
SPDX-FileCopyrightText: © 2021 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import datetime
import logging
import os
import re
import markdown
from flask import current_app as app
from flask import make_response, render_template, request
from markupsafe import Markup
logger = logging.getLogger(__name__)
def get_meta_str(md, key):
"""Provide the page's (parsed in Markup obj md) metadata for the specified key, or '' if unset."""
return " ".join(md.Meta.get(key)) if md.Meta.get(key) else ""
def init_md():
"""Initialize the Markdown parser.
This used to done at the app level in __init__, but extensions like footnotes apparently
assume the parser to only live for the length of parsing one document, and create double
footnote ref links if the one parser sees the same document multiple times.
"""
# initialize markdown parser from config, but include
# extensions our app depends on, like the meta extension
return markdown.Markdown(extensions=app.config['MARKDOWN_EXTENSIONS'] + ['meta'],
extension_configs=app.config['MARKDOWN_EXTENSION_CONFIGS'])
def instance_resource_path_to_request_path(path):
"""Reverse a (presumed to exist) RELATIVE disk path to the canonical path that would show up in a Flask route.
This does not include the leading /, so aside from the root index case, this should be
bidirectional.
"""
return re.sub(r'^pages/', '', re.sub(r'.md$', '', re.sub(r'index.md$', '', path)))
def parse_md(resolved_path):
"""Given a file to parse, return file content and other derived data along with the md object."""
try:
logger.debug("opening resolved path '%s'", resolved_path)
with app.open_instance_resource(resolved_path, 'r') as entry_file:
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(entry_file.name), tz=datetime.timezone.utc)
entry = entry_file.read()
logger.debug("resolved path '%s' read", resolved_path)
md = init_md()
content = Markup(md.convert(entry))
except OSError:
logger.exception("resolved path '%s' could not be opened!", resolved_path)
raise
except ValueError:
logger.exception("error parsing/rendering markdown!")
raise
except TypeError:
logger.exception("error loading/rendering markdown!")
raise
logger.debug("file metadata: %s", md.Meta)
page_name = (get_meta_str(md, 'title') if md.Meta.get('title') else
f'/{instance_resource_path_to_request_path(resolved_path)}')
page_title = f'{page_name} - {app.config["TITLE_SUFFIX"]}' if page_name else app.config['TITLE_SUFFIX']
logger.debug("title (potentially derived): %s", page_title)
return content, md, page_name, page_title, mtime
def render(template_name_or_list, **context):
"""Wrap Flask's render_template.
* Determine the proper site theme to use in the template and provide it.
"""
page_styles = app.config['PAGE_STYLES']
selected_style = request.args.get('style', None)
if selected_style:
user_style = selected_style
else:
user_style = request.cookies.get('user-style')
logger.debug("user style cookie: %s", user_style)
context['user_style'] = page_styles.get(user_style, page_styles.get(app.config['DEFAULT_PAGE_STYLE']))
context['page_styles'] = page_styles
resp = make_response(render_template(template_name_or_list, **context))
if selected_style:
resp.set_cookie('user-style', selected_style)
return resp

146
incorporealcms/markdown.py Normal file
View File

@@ -0,0 +1,146 @@
"""Process Markdown pages.
With the project now being a SSG, most files we just let the web server serve
as is, but .md files need to be processed with a Markdown parser, so a lot of this
is our tweaks and customizations for pages my way.
SPDX-FileCopyrightText: © 2025 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: GPL-3.0-only
"""
import datetime
import logging
import os
import re
import markdown
from jinja2 import Environment, PackageLoader, select_autoescape
from markupsafe import Markup
from incorporealcms.config import Config
logger = logging.getLogger(__name__)
jinja_env = Environment(
loader=PackageLoader('incorporealcms'),
autoescape=select_autoescape(),
)
def get_meta_str(md, key):
"""Provide the page's (parsed in Markup obj md) metadata for the specified key, or '' if unset."""
return " ".join(md.Meta.get(key)) if md.Meta.get(key) else ""
def init_md():
"""Initialize the Markdown parser.
This used to done at the app level in __init__, but extensions like footnotes apparently
assume the parser to only live for the length of parsing one document, and create double
footnote ref links if the one parser sees the same document multiple times.
"""
# initialize markdown parser from config, but include
# extensions our app depends on, like the meta extension
return markdown.Markdown(extensions=Config.MARKDOWN_EXTENSIONS + ['meta'],
extension_configs=Config.MARKDOWN_EXTENSION_CONFIGS)
def instance_resource_path_to_request_path(path):
"""Reverse a relative disk path to the path that would show up in a URL request."""
return '/' + re.sub(r'.md$', '', re.sub(r'index.md$', '', path))
def parse_md(path: str):
"""Given a file to parse, return file content and other derived data along with the md object.
Args:
path: the path to the file to render
"""
try:
logger.debug("opening path '%s'", path)
with open(path, 'r') as input_file:
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(input_file.name), tz=datetime.timezone.utc)
entry = input_file.read()
logger.debug("path '%s' read", path)
md = init_md()
content = Markup(md.convert(entry)) # nosec B704
except (OSError, FileNotFoundError):
logger.exception("path '%s' could not be opened!", path)
raise
except ValueError:
logger.exception("error parsing/rendering markdown!")
raise
logger.debug("file metadata: %s", md.Meta)
page_name = get_meta_str(md, 'title') if md.Meta.get('title') else instance_resource_path_to_request_path(path)
page_title = f'{page_name} - {Config.TITLE_SUFFIX}' if page_name else Config.TITLE_SUFFIX
logger.debug("title (potentially derived): %s", page_title)
return content, md, page_name, page_title, mtime
def handle_markdown_file_path(path: str) -> str:
"""Given a location on disk, attempt to open it and render the markdown within."""
content, md, page_name, page_title, mtime = parse_md(path)
parent_navs = generate_parent_navs(path)
extra_footer = get_meta_str(md, 'footer') if md.Meta.get('footer') else None
template_name = get_meta_str(md, 'template') if md.Meta.get('template') else 'base.html'
# check if this has a HTTP redirect
redirect_url = get_meta_str(md, 'redirect') if md.Meta.get('redirect') else None
if redirect_url:
raise NotImplementedError("redirects in markdown are unsupported!")
template = jinja_env.get_template(template_name)
return template.render(title=page_title,
config=Config,
description=get_meta_str(md, 'description'),
image=get_meta_str(md, 'image'),
content=content,
base_url=Config.BASE_HOST + instance_resource_path_to_request_path(path),
navs=parent_navs,
mtime=mtime.strftime('%Y-%m-%d %H:%M:%S %Z'),
extra_footer=extra_footer)
def generate_parent_navs(path):
"""Create a series of paths/links to navigate up from the given resource path."""
if path == 'index.md':
# bail and return the domain name as a terminal case
return [(Config.DOMAIN_NAME, '/')]
else:
if path.endswith('index.md'):
# index case: one dirname for foo/bar/index.md -> foo/bar, one for foo/bar -> foo
parent_resource_dir = os.path.dirname(os.path.dirname(path))
else:
# usual case: foo/buh.md -> foo
parent_resource_dir = os.path.dirname(path)
# generate the request path (i.e. what the link will be) for this path, and
# also the resource path of this parent (which is always a dir, so always index.md)
request_path = instance_resource_path_to_request_path(path)
parent_resource_path = os.path.join(parent_resource_dir, 'index.md')
logger.debug("resource path: '%s'; request path: '%s'; parent resource path: '%s'", path,
request_path, parent_resource_path)
# for issues regarding parser reuse (see lib.init_md) we reinitialize the parser here
md = init_md()
# read the resource
try:
with open(path, 'r') as entry_file:
entry = entry_file.read()
_ = Markup(md.convert(entry)) # nosec B704
page_name = (" ".join(md.Meta.get('title')) if md.Meta.get('title')
else request_path_to_breadcrumb_display(request_path))
return generate_parent_navs(parent_resource_path) + [(page_name, request_path)]
except FileNotFoundError:
return generate_parent_navs(parent_resource_path) + [(request_path, request_path)]
def request_path_to_breadcrumb_display(path):
"""Given a request path, e.g. "/foo/bar/baz/", turn it into breadcrumby text "baz"."""
undired = path.rstrip('/')
leaf = undired[undired.rfind('/'):]
return leaf.strip('/')

View File

@@ -1,5 +1,5 @@
"""Markdown extensions.
SPDX-FileCopyrightText: © 2021 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-only
"""

View File

@@ -1,10 +1,10 @@
"""Create generic figures with captions.
SPDX-FileCopyrightText: © 2022 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-only
"""
import re
from xml.etree.ElementTree import SubElement # nosec B405 - not parsing untrusted XML here
from xml.etree.ElementTree import SubElement # nosec B405
import markdown

View File

@@ -1,7 +1,7 @@
"""Serve dot diagrams inline.
SPDX-FileCopyrightText: © 2021 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
SPDX-License-Identifier: GPL-3.0-only
"""
import base64
import logging

View File

@@ -1,180 +0,0 @@
"""General page functionality.
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import logging
import os
from flask import Blueprint, abort
from flask import current_app as app
from flask import redirect, request, send_from_directory
from markupsafe import Markup
from werkzeug.security import safe_join
from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md, render
logger = logging.getLogger(__name__)
bp = Blueprint('pages', __name__, url_prefix='/')
@bp.route('/', defaults={'path': 'index'})
@bp.route('/<path:path>')
def display_page(path):
"""Get the file contents of the requested path and render the file."""
try:
resolved_path, render_type = request_path_to_instance_resource_path(path)
logger.debug("received request for path '%s', resolved to '%s', type '%s'",
path, resolved_path, render_type)
except PermissionError:
abort(400)
except IsADirectoryError:
return redirect(f'/{path}/', code=301)
except FileNotFoundError:
abort(404)
if render_type == 'file':
return send_from_directory(app.instance_path, resolved_path)
elif render_type == 'symlink':
logger.debug("attempting to redirect path '%s' to reverse of resource '%s'", path, resolved_path)
redirect_path = f'/{instance_resource_path_to_request_path(resolved_path)}'
logger.debug("redirect path: '%s'", redirect_path)
return redirect(redirect_path, code=301)
elif render_type == 'markdown':
logger.debug("treating path '%s' as markdown '%s'", path, resolved_path)
return handle_markdown_file_path(resolved_path)
else:
logger.exception("unsupported render_type '%s'!?", render_type)
abort(500)
def handle_markdown_file_path(resolved_path):
"""Given a location on disk, attempt to open it and render the markdown within."""
try:
content, md, page_name, page_title, mtime = parse_md(resolved_path)
except OSError:
logger.exception("resolved path '%s' could not be opened!", resolved_path)
abort(500)
except ValueError:
logger.exception("error parsing/rendering markdown!")
abort(500)
except TypeError:
logger.exception("error loading/rendering markdown!")
abort(500)
else:
parent_navs = generate_parent_navs(resolved_path)
extra_footer = get_meta_str(md, 'footer') if md.Meta.get('footer') else None
template = get_meta_str(md, 'template') if md.Meta.get('template') else 'base.html'
# check if this has a HTTP redirect
redirect_url = get_meta_str(md, 'redirect') if md.Meta.get('redirect') else None
if redirect_url:
logger.debug("redirecting via meta tag to '%s'", redirect_url)
return redirect(redirect_url, code=301)
return render(template, title=page_title, description=get_meta_str(md, 'description'),
image=get_meta_str(md, 'image'), base_url=request.base_url, content=content,
navs=parent_navs, mtime=mtime.strftime('%Y-%m-%d %H:%M:%S %Z'),
extra_footer=extra_footer)
def request_path_to_instance_resource_path(path):
"""Turn a request URL path to the full page path.
flask.Flask.open_instance_resource will open a file like /etc/hosts if you tell it to,
which sucks, so we do a lot of work here to make sure we have a valid request to
something inside the pages dir.
"""
# check if the path is allowed
base_dir = os.path.realpath(f'{app.instance_path}/pages/')
safe_path = safe_join(base_dir, path)
# bail if the requested real path isn't inside the base directory
if not safe_path:
logger.warning("client tried to request a path '%s' outside of the base_dir!", path)
raise PermissionError
verbatim_path = os.path.abspath(safe_path)
resolved_path = os.path.realpath(verbatim_path)
logger.debug("base_dir '%s', constructed resolved_path '%s' for path '%s'", base_dir, resolved_path, path)
# see if we have a real file or if we should infer markdown rendering
if os.path.exists(resolved_path):
# if this is a file-like request but actually a directory, redirect the user
if os.path.isdir(resolved_path) and not path.endswith('/'):
logger.info("client requested a path '%s' that is actually a directory", path)
raise IsADirectoryError
# if the requested path contains a symlink, redirect the user
if verbatim_path != resolved_path:
logger.info("client requested a path '%s' that is actually a symlink to file '%s'", path, resolved_path)
return resolved_path.replace(f'{app.instance_path}{os.path.sep}', ''), 'symlink'
# derive the proper markdown or actual file depending on if this is a dir or file
if os.path.isdir(resolved_path):
resolved_path = os.path.join(resolved_path, 'index.md')
return resolved_path.replace(f'{app.instance_path}{os.path.sep}', ''), 'markdown'
logger.info("final DIRECT path = '%s' for request '%s'", resolved_path, path)
return resolved_path.replace(f'{app.instance_path}{os.path.sep}', ''), 'file'
# if we're here, this isn't direct file access, so try markdown inference
verbatim_path = f'{safe_path}.md'
resolved_path = os.path.realpath(verbatim_path)
# does the final file actually exist?
if not os.path.exists(resolved_path):
logger.warning("requested final path '%s' does not exist!", resolved_path)
raise FileNotFoundError
# check for symlinks
if verbatim_path != resolved_path:
logger.info("client requested a path '%s' that is actually a symlink to file '%s'", path, resolved_path)
return resolved_path.replace(f'{app.instance_path}{os.path.sep}', ''), 'symlink'
logger.info("final path = '%s' for request '%s'", resolved_path, path)
# we checked that the file exists via absolute path, but now we need to give the path relative to instance dir
return resolved_path.replace(f'{app.instance_path}{os.path.sep}', ''), 'markdown'
def generate_parent_navs(path):
"""Create a series of paths/links to navigate up from the given resource path."""
if path == 'pages/index.md':
# bail and return the domain name as a terminal case
return [(app.config['DOMAIN_NAME'], '/')]
else:
if path.endswith('index.md'):
# index case: one dirname for foo/bar/index.md -> foo/bar, one for foo/bar -> foo
parent_resource_dir = os.path.dirname(os.path.dirname(path))
else:
# usual case: foo/buh.md -> foo
parent_resource_dir = os.path.dirname(path)
# generate the request path (i.e. what the link will be) for this path, and
# also the resource path of this parent (which is always a dir, so always index.md)
request_path = f'/{instance_resource_path_to_request_path(path)}'
parent_resource_path = os.path.join(parent_resource_dir, 'index.md')
logger.debug("resource path: '%s'; request path: '%s'; parent resource path: '%s'", path,
request_path, parent_resource_path)
# for issues regarding parser reuse (see lib.init_md) we reinitialize the parser here
md = init_md()
# read the resource
try:
with app.open_instance_resource(path, 'r') as entry_file:
entry = entry_file.read()
_ = Markup(md.convert(entry))
page_name = (" ".join(md.Meta.get('title')) if md.Meta.get('title')
else request_path_to_breadcrumb_display(request_path))
return generate_parent_navs(parent_resource_path) + [(page_name, request_path)]
except FileNotFoundError:
return generate_parent_navs(parent_resource_path) + [(request_path, request_path)]
def request_path_to_breadcrumb_display(path):
"""Given a request path, e.g. "/foo/bar/baz/", turn it into breadcrumby text "baz"."""
undired = path.rstrip('/')
leaf = undired[undired.rfind('/'):]
return leaf.strip('/')

188
incorporealcms/ssg.py Normal file
View File

@@ -0,0 +1,188 @@
"""Build an instance as a static site suitable for serving via e.g. Nginx.
SPDX-FileCopyrightText: © 2022 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: GPL-3.0-only
"""
import argparse
import os
import shutil
import stat
import tempfile
from termcolor import cprint
from incorporealcms import __version__, init_instance
from incorporealcms.markdown import handle_markdown_file_path
class StaticSiteGenerator(object):
"""Generate static site output based on the instance's content."""
def __init__(self, instance_dir: str, output_dir: str, extra_config=None):
"""Create the object to run various operations to generate the static site.
Args:
instance_dir: the directory from which to read an instance format set of content
output_dir: the directory to write the generated static site to
"""
self.instance_dir = instance_dir
self.output_dir = output_dir
instance_dir = os.path.abspath(instance_dir)
output_dir = os.path.abspath(output_dir)
# initialize configuration with the path to the instance
init_instance(instance_dir, extra_config)
def build(self):
"""Build the whole static site."""
# putting the temporary directory next to the desired output so we can safely rename it later
tmp_output_dir = tempfile.mkdtemp(dir=os.path.dirname(self.output_dir))
cprint(f"creating temporary directory '{tmp_output_dir}' for writing", 'green')
# copy core content
pages_dir = os.path.join(self.instance_dir, 'pages')
self.build_in_destination(pages_dir, tmp_output_dir)
# copy the program's static dir
program_static_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'static')
static_output_dir = os.path.join(tmp_output_dir, 'static')
try:
os.mkdir(static_output_dir)
except FileExistsError:
# already exists
pass
self.build_in_destination(program_static_dir, static_output_dir, convert_markdown=False)
# copy the instance's static dir --- should I deprecate this since it could just be stuff in pages/static/?
custom_static_dir = os.path.join(self.instance_dir, 'custom-static')
self.build_in_destination(custom_static_dir, static_output_dir, convert_markdown=False)
# move temporary dir to the destination
old_output_dir = f'{self.output_dir}-old-{os.path.basename(tmp_output_dir)}'
if os.path.exists(self.output_dir):
cprint(f"renaming '{self.output_dir}' to '{old_output_dir}'", 'green')
os.rename(self.output_dir, old_output_dir)
cprint(f"renaming '{tmp_output_dir}' to '{self.output_dir}'", 'green')
os.rename(tmp_output_dir, self.output_dir)
os.chmod(self.output_dir,
stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
# TODO: unlink old dir above? arg flag?
def build_in_destination(self, source_dir: str, dest_dir: str, convert_markdown: bool = True) -> None:
"""Walk the source directory and copy and/or convert its contents into the destination.
Args:
source_dir: the directory to copy into the destination
dest_dir: the directory to place copied/converted files into
convert_markdown: whether or not to convert Markdown files (or simply copy them)
"""
cprint(f"copying files from '{source_dir}' to '{dest_dir}'", 'green')
os.chdir(source_dir)
for base_dir, subdirs, files in os.walk(source_dir):
# remove the absolute path of the directory from the base_dir
base_dir = os.path.relpath(base_dir, source_dir)
# create subdirs seen here for subsequent depth
for subdir in subdirs:
self.build_subdir_in_destination(source_dir, base_dir, subdir, dest_dir)
# process and copy files
for file_ in files:
self.build_file_in_destination(source_dir, base_dir, file_, dest_dir, convert_markdown)
def build_subdir_in_destination(self, source_dir: str, base_dir: str, subdir: str, dest_dir: str) -> None:
"""Create a subdir (which might actually be a symlink) in the output dir.
Args:
source_dir: the absolute path of the location in the instance, contains subdir
base_dir: the relative path of the location in the instance, contains subdir
subdir: the subdir in the instance to replicate in the output
dest_dir: the output directory to place the subdir in
"""
dst = os.path.join(dest_dir, base_dir, subdir)
if os.path.islink(os.path.join(base_dir, subdir)):
# keep the link relative to the output directory
src = self.symlink_to_relative_dest(source_dir, os.path.join(base_dir, subdir))
print(f"creating directory symlink '{dst}' -> '{src}'")
os.symlink(src, dst, target_is_directory=True)
else:
print(f"creating directory '{dst}'")
try:
os.mkdir(dst)
except FileExistsError:
# already exists
pass
def build_file_in_destination(self, source_dir: str, base_dir: str, file_: str, dest_dir: str,
convert_markdown=False) -> None:
"""Create a file (which might actually be a symlink) in the output dir.
Args:
source_dir: the absolute path of the location in the instance, contains subdir
base_dir: the relative path of the location in the instance, contains subdir
file_: the file in the instance to replicate in the output
dest_dir: the output directory to place the subdir in
"""
dst = os.path.join(dest_dir, base_dir, file_)
if os.path.islink(os.path.join(base_dir, file_)):
# keep the link relative to the output directory
src = self.symlink_to_relative_dest(source_dir, os.path.join(base_dir, file_))
print(f"creating symlink '{dst}' -> '{src}'")
os.symlink(src, dst, target_is_directory=False)
else:
src = os.path.join(base_dir, file_)
print(f"copying file '{src}' -> '{dst}'")
shutil.copy2(src, dst)
# render markdown as HTML
if src.endswith('.md') and convert_markdown:
rendered_file = dst.removesuffix('.md') + '.html'
try:
content = handle_markdown_file_path(src)
except UnicodeDecodeError:
# perhaps this isn't a markdown file at all for some reason; we
# copied it above so stick with tha
cprint(f"{src} has invalid bytes! skipping", 'yellow')
else:
with open(rendered_file, 'w') as dst_file:
dst_file.write(content)
def symlink_to_relative_dest(self, base_dir: str, source: str) -> str:
"""Given a symlink, make sure it points to something inside the instance and provide its real destination.
Args:
base_dir: the full absolute path of the instance's pages dir, which the symlink destination must be in.
source: the symlink to check
Returns:
what the symlink points at
"""
if not os.path.realpath(source).startswith(base_dir):
raise ValueError(f"symlink destination {os.path.realpath(source)} is outside the instance!")
# this symlink points to realpath inside base_dir, so relative to base_dir, the symlink dest is...
return os.path.relpath(os.path.realpath(source), base_dir)
def build():
"""Build the static site generated against an instance directory."""
parser = argparse.ArgumentParser(
description="Build the static site generated against an instance directory.",
)
parser.add_argument(
'instance_dir', help="path to instance directory root (NOTE: the program will go into pages/)"
)
parser.add_argument(
'output_dir', help="path to directory to output to (NOTE: the program must be able to write into its parent!)"
)
args = parser.parse_args()
cprint(f"incorporealcms-build v{__version__} Copyright (C) 2025 Brian S. Stephan <bss@incorporeal.org>", 'green')
# check output path before doing work
if not os.path.isdir(args.output_dir):
# if it doesn't exist, great, we'll just move the temporary dir later;
# if it exists and is a dir, that's fine, but if it's a file, we should error
if os.path.exists(args.output_dir):
raise ValueError(f"specified output path '{args.output_dir}' exists as a file!")
site_gen = StaticSiteGenerator(args.instance_dir, args.output_dir)
site_gen.build()

View File

@@ -1,18 +0,0 @@
"""Serve static files from the instance directory.
SPDX-FileCopyrightText: © 2022 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import os
from flask import Blueprint
from flask import current_app as app
from flask import send_from_directory
bp = Blueprint('static', __name__, url_prefix='/custom-static')
@bp.route('/<path:name>')
def serve_instance_static_file(name):
"""Serve a static file from the instance directory, used for customization."""
return send_from_directory(os.path.join(app.instance_path, 'custom-static'), name)

View File

@@ -1,6 +1,6 @@
/*
* SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
* SPDX-License-Identifier: AGPL-3.0-or-later
* SPDX-License-Identifier: GPL-3.0-only
*/
html {

View File

@@ -1,6 +1,6 @@
/*
* SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
* SPDX-License-Identifier: AGPL-3.0-or-later
* SPDX-License-Identifier: GPL-3.0-only
*/
/* common styling via the base.css, used in light and dark */

View File

@@ -1,6 +1,6 @@
/*
* SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
* SPDX-License-Identifier: AGPL-3.0-or-later
* SPDX-License-Identifier: GPL-3.0-only
*/
/* common styling via the base.css, used in light and dark */

View File

@@ -1,6 +1,6 @@
/*
* SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
* SPDX-License-Identifier: AGPL-3.0-or-later
* SPDX-License-Identifier: GPL-3.0-only
*/
/* specify almost no styling, just fix some image and nav rendering */

View File

@@ -1,8 +1,7 @@
<!--
{#
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
-->
SPDX-License-Identifier: GPL-3.0-only
#}
{% extends "base.html" %}
{% block header %}

View File

@@ -1,8 +1,7 @@
<!--
{#
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
-->
SPDX-License-Identifier: GPL-3.0-only
#}
{% extends "base.html" %}
{% block header %}

View File

@@ -1,8 +1,7 @@
<!--
{#
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
-->
SPDX-License-Identifier: GPL-3.0-only
#}
{% extends "base.html" %}
{% block header %}

View File

@@ -1,7 +1,6 @@
<!--
{#
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
-->
SPDX-License-Identifier: GPL-3.0-only
#}
{% extends "base.html" %}
{% block site_class %}class="site-wrap site-wrap-double-width"{% endblock %}

View File

@@ -1,21 +1,67 @@
<!--
<!doctype html>{#
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
-->
<!doctype html>
SPDX-License-Identifier: GPL-3.0-only
#}
<html lang="en">
<title>{{ title }}</title>
<meta charset="utf-8">
{% if title %}<meta property="og:title" content="{{ title }}">{% endif %}
{% if description %}<meta property="og:description" content="{{ description }}">{% endif %}
{% if image %}<meta property="og:image" content="{{ image }}">{% endif %}
<meta property="og:url" content="{{ base_url }}">
{% if image %}<meta property="og:image" content="{{ image }}">{% endif %}
<meta name="twitter:card" content="summary_large_image">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="{{ user_style }}">
<link rel="icon" href="{% if config.FAVICON %}{{ config.FAVICON }}{% else %}{{ url_for('static', filename='img/favicon.png') }}{% endif %}">
<link rel="stylesheet" type="text/css" title="{{ config.DEFAULT_PAGE_STYLE }}" href="{{ config.PAGE_STYLES[config.DEFAULT_PAGE_STYLE] }}">
{% for style, stylesheet in config.PAGE_STYLES.items() %}
<link rel="alternate stylesheet" type="text/css" title="{{ style }}" href="{{ stylesheet }}">
{% endfor %}
<link rel="icon" href="{{ config.FAVICON }}">
<link rel="alternate" type="application/atom+xml" href="/feed/atom">
<link rel="alternate" type="application/rss+xml" href="/feed/rss">
<script type="text/javascript">
// loathe as I am to use JavaScript, this style selection is one of my favorite parts
// of my CMS, so I want to keep it around even in the static site
function applyStyle(styleName) {
// disable all stylesheets except the one to apply, the user style
var i, link_tag;
for (i = 0, link_tag = document.getElementsByTagName("link"); i < link_tag.length; i++ ) {
// find the stylesheets with titles, meaning they can be disabled/enabled
if ((link_tag[i].rel.indexOf("stylesheet") != -1) && link_tag[i].title) {
alert(link_tag[i].title);
link_tag[i].disabled = true;
if (link_tag[i].title == styleName) {
link_tag[i].disabled = false ;
}
}
}
}
function setStyle(styleName) {
document.cookie = "user-style=" + encodeURIComponent(styleName) + "; max-age=31536000";
applyStyle(styleName);
}
function applyStyleFromCookie() {
// get the user style cookie and set that specified style as the active one
var styleName = getCookie("user-style");
alert(styleName);
if (styleName) {
applyStyle(styleName);
}
}
function getCookie(cookieName) {
// find the desired cookie from the document's cookie(s) string
let matches = document.cookie.match(new RegExp(
"(?:^|; )" + cookieName.replace(/([\.$?*|{}\(\)\[\]\\\/\+^])/g, '\\$1') + "=([^;]*)"
));
alert(matches);
return matches ? decodeURIComponent(matches[1]) : undefined;
}
applyStyleFromCookie();
</script>
<div {% block site_class %}class="site-wrap site-wrap-normal-width"{% endblock %}>
{% block header %}
@@ -26,11 +72,13 @@ SPDX-License-Identifier: AGPL-3.0-or-later
{% if not loop.last %} &raquo; {% endif %}
{% endfor %}
</div>
{% if page_styles %}
<div class="styles">
{% for style in page_styles %}
<a href="?style={{ style }}">[{{ style }}]</a>
<a href="#" onclick="setStyle('{{ style }}'); return false;">[{{ style }}]</a>
{% endfor %}
</div>
{% endif %}
</div>
{% endblock %}
{% block body %}