implement a rudimentary Atom/RSS feed module

this provides a somewhat unconfigurable (at the moment) feed module
which provides Atom and RSS feeds. entries are determined by symlinks to
content pages, because my core CMS usage is still more general and not
blog-like. the symlinks allow for arbitrarily adding entries as I see
fit.

this also moves core Markdown parser stuff to the library module, since
that's used by the feed as well as normal pages

Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
This commit is contained in:
Brian S. Stephan 2023-12-30 13:23:13 -06:00
parent 5a9a36f463
commit 6dc443e59f
Signed by: bss
GPG Key ID: 3DE06D3180895FCB
10 changed files with 175 additions and 38 deletions

View File

@ -39,7 +39,8 @@ def create_app(instance_path=None, test_config=None):
logger.info("RESPONSE: %s %s: %s", request.method, request.path, response.status)
return response
from . import error_pages, pages, static
from . import error_pages, feed, pages, static
app.register_blueprint(feed.bp)
app.register_blueprint(pages.bp)
app.register_blueprint(static.bp)
app.register_error_handler(400, error_pages.bad_request)

71
incorporealcms/feed.py Normal file
View File

@ -0,0 +1,71 @@
"""Generate Atom and RSS feeds based on content in a blog-ish location.
This parses a special root directory, feed/, for feed/YYYY/MM/DD/file files,
and combines them into an Atom or RSS feed. These files *should* be symlinks
to the real pages, which may mirror the same YYYY/MM/DD/file naming scheme
under pages/ (which may make sense for a blog) if they want, but could just
as well be pages/foo content.
SPDX-FileCopyrightText: © 2023 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import logging
import os
import re
from feedgen.feed import FeedGenerator
from flask import Blueprint, abort
from flask import current_app as app
from incorporealcms.lib import instance_resource_path_to_request_path, parse_md
logger = logging.getLogger(__name__)
bp = Blueprint('feed', __name__, url_prefix='/feed')
@bp.route('/<feed_type>')
def serve_feed(feed_type):
"""Serve the Atom or RSS feed as requested."""
logger.warning("wat")
if feed_type not in ('atom', 'rss'):
abort(404)
fg = FeedGenerator()
fg.id(f'{app.config["DOMAIN_NAME"]}')
fg.title(f'{app.config["TITLE_SUFFIX"]}')
fg.link(href=f'https://{app.config["DOMAIN_NAME"]}/feed/{feed_type}', rel='self')
fg.link(href=f'https://{app.config["DOMAIN_NAME"]}', rel='alternate')
fg.subtitle(f"Blog posts and other dated materials from {app.config['TITLE_SUFFIX']}")
# get recent feeds
feed_path = os.path.join(app.instance_path, 'feed')
feed_entry_paths = [os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(feed_path)
for filename in filenames if os.path.islink(os.path.join(dirpath, filename))]
for feed_entry_path in sorted(feed_entry_paths):
# get the actual file to parse it
resolved_path = os.path.realpath(feed_entry_path).replace(f'{app.instance_path}/', '')
try:
content, md, page_name, page_title, mtime = parse_md(resolved_path)
link = f'https://{app.config["DOMAIN_NAME"]}/{instance_resource_path_to_request_path(resolved_path)}'
except (OSError, ValueError, TypeError):
logger.exception("error loading/rendering markdown!")
abort(500)
fe = fg.add_entry()
fe.id(_generate_feed_id(feed_entry_path))
fe.title(page_name if page_name else page_title)
fe.link(href=link)
fe.content(content, type='html')
if feed_type == 'atom':
return fg.atom_str(pretty=True)
else:
return fg.rss_str(pretty=True)
def _generate_feed_id(feed_entry_path):
"""For a relative file path, generate the Atom/RSS feed ID for it."""
date = re.sub(r'.*/(\d+)/(\d+)/(\d+).*', r'\1-\2-\3', feed_entry_path)
cleaned = feed_entry_path.replace('#', '/').replace('feed/', '', 1).replace(app.instance_path, '')
return f'tag:{app.config["DOMAIN_NAME"]},{date}:{cleaned}'

View File

@ -3,11 +3,15 @@
SPDX-FileCopyrightText: © 2021 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import datetime
import logging
import os
import re
import markdown
from flask import current_app as app
from flask import make_response, render_template, request
from markupsafe import Markup
logger = logging.getLogger(__name__)
@ -30,6 +34,45 @@ def init_md():
extension_configs=app.config['MARKDOWN_EXTENSION_CONFIGS'])
def instance_resource_path_to_request_path(path):
"""Reverse a (presumed to exist) RELATIVE disk path to the canonical path that would show up in a Flask route.
This does not include the leading /, so aside from the root index case, this should be
bidirectional.
"""
return re.sub(r'^pages/', '', re.sub(r'.md$', '', re.sub(r'index.md$', '', path)))
def parse_md(resolved_path):
"""Given a file to parse, return file content and other derived data along with the md object."""
try:
logger.debug("opening resolved path '%s'", resolved_path)
with app.open_instance_resource(resolved_path, 'r') as entry_file:
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(entry_file.name), tz=datetime.timezone.utc)
entry = entry_file.read()
logger.debug("resolved path '%s' read", resolved_path)
md = init_md()
content = Markup(md.convert(entry))
except OSError:
logger.exception("resolved path '%s' could not be opened!", resolved_path)
raise
except ValueError:
logger.exception("error parsing/rendering markdown!")
raise
except TypeError:
logger.exception("error loading/rendering markdown!")
raise
logger.debug("file metadata: %s", md.Meta)
page_name = (get_meta_str(md, 'title') if md.Meta.get('title') else
f'/{instance_resource_path_to_request_path(resolved_path)}')
page_title = f'{page_name} - {app.config["TITLE_SUFFIX"]}' if page_name else app.config['TITLE_SUFFIX']
logger.debug("title (potentially derived): %s", page_title)
return content, md, page_name, page_title, mtime
def render(template_name_or_list, **context):
"""Wrap Flask's render_template.

View File

@ -3,10 +3,8 @@
SPDX-FileCopyrightText: © 2020 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import datetime
import logging
import os
import re
from flask import Blueprint, abort
from flask import current_app as app
@ -14,7 +12,7 @@ from flask import redirect, request, send_from_directory
from markupsafe import Markup
from werkzeug.security import safe_join
from incorporealcms.lib import get_meta_str, init_md, render
from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md, render
logger = logging.getLogger(__name__)
@ -54,36 +52,19 @@ def display_page(path):
def handle_markdown_file_path(resolved_path):
"""Given a location on disk, attempt to open it and render the markdown within."""
try:
logger.debug("opening resolved path '%s'", resolved_path)
with app.open_instance_resource(resolved_path, 'r') as entry_file:
mtime = datetime.datetime.fromtimestamp(os.path.getmtime(entry_file.name), tz=datetime.timezone.utc)
entry = entry_file.read()
logger.debug("resolved path '%s' read", resolved_path)
content, md, page_name, page_title, mtime = parse_md(resolved_path)
except OSError:
logger.exception("resolved path '%s' could not be opened!", resolved_path)
abort(500)
except ValueError:
logger.exception("error parsing/rendering markdown!")
abort(500)
except TypeError:
logger.exception("error loading/rendering markdown!")
abort(500)
else:
try:
md = init_md()
content = Markup(md.convert(entry))
except ValueError:
logger.exception("error parsing/rendering markdown!")
abort(500)
except TypeError:
logger.exception("error loading/rendering markdown!")
abort(500)
logger.debug("file metadata: %s", md.Meta)
parent_navs = generate_parent_navs(resolved_path)
page_name = (get_meta_str(md, 'title') if md.Meta.get('title') else
f'/{instance_resource_path_to_request_path(resolved_path)}')
page_title = f'{page_name} - {app.config["TITLE_SUFFIX"]}' if page_name else app.config['TITLE_SUFFIX']
logger.debug("title (potentially derived): %s", page_title)
extra_footer = get_meta_str(md, 'footer') if md.Meta.get('footer') else None
template = get_meta_str(md, 'template') if md.Meta.get('template') else 'base.html'
# check if this has a HTTP redirect
@ -156,15 +137,6 @@ def request_path_to_instance_resource_path(path):
return resolved_path.replace(f'{app.instance_path}{os.path.sep}', ''), 'markdown'
def instance_resource_path_to_request_path(path):
"""Reverse a (presumed to exist) disk path to the canonical path that would show up in a Flask route.
This does not include the leading /, so aside from the root index case, this should be
bidirectional.
"""
return re.sub(r'^pages/', '', re.sub(r'.md$', '', re.sub(r'index.md$', '', path)))
def generate_parent_navs(path):
"""Create a series of paths/links to navigate up from the given resource path."""
if path == 'pages/index.md':

View File

@ -11,7 +11,7 @@ authors = [
{name = "Brian S. Stephan", email = "bss@incorporeal.org"},
]
requires-python = ">=3.8"
dependencies = ["Flask", "Markdown"]
dependencies = ["feedgen", "Flask", "Markdown"]
dynamic = ["version"]
classifiers = [
"Framework :: Flask",

View File

@ -35,6 +35,8 @@ dlint==0.14.1
# via incorporeal-cms (pyproject.toml)
dparse==0.6.3
# via safety
feedgen==1.0.0
# via incorporeal-cms (pyproject.toml)
filelock==3.13.1
# via
# tox
@ -83,6 +85,8 @@ itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
lxml==5.0.0
# via feedgen
markdown==3.5.1
# via incorporeal-cms (pyproject.toml)
markdown-it-py==3.0.0
@ -144,6 +148,8 @@ pytest==7.4.3
# pytest-cov
pytest-cov==4.1.0
# via incorporeal-cms (pyproject.toml)
python-dateutil==2.8.2
# via feedgen
pyyaml==6.0.1
# via bandit
requests==2.31.0
@ -158,6 +164,8 @@ safety==2.3.5
# via incorporeal-cms (pyproject.toml)
setuptools-scm==8.0.4
# via incorporeal-cms (pyproject.toml)
six==1.16.0
# via python-dateutil
smmap==5.0.1
# via gitdb
snowballstemmer==2.2.0

View File

@ -8,17 +8,25 @@ blinker==1.7.0
# via flask
click==8.1.7
# via flask
feedgen==1.0.0
# via incorporeal-cms (pyproject.toml)
flask==3.0.0
# via incorporeal-cms (pyproject.toml)
itsdangerous==2.1.2
# via flask
jinja2==3.1.2
# via flask
lxml==5.0.0
# via feedgen
markdown==3.5.1
# via incorporeal-cms (pyproject.toml)
markupsafe==2.1.3
# via
# jinja2
# werkzeug
python-dateutil==2.8.2
# via feedgen
six==1.16.0
# via python-dateutil
werkzeug==3.0.1
# via flask

View File

@ -0,0 +1 @@
../../../../pages/forced-no-title.md

View File

@ -0,0 +1 @@
../../../../pages/subdir-with-title/page.md

32
tests/test_feed.py Normal file
View File

@ -0,0 +1,32 @@
"""Test the feed methods.
SPDX-FileCopyrightText: © 2023 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
from incorporealcms.feed import serve_feed
def test_unknown_type_is_404(client):
"""Test that requesting a feed type that doesn't exist is a 404."""
response = client.get('/feed/wat')
assert response.status_code == 404
def test_atom_type_is_200(client):
"""Test that requesting an ATOM feed is found."""
response = client.get('/feed/atom')
assert response.status_code == 200
print(response.text)
def test_rss_type_is_200(client):
"""Test that requesting an RSS feed is found."""
response = client.get('/feed/rss')
assert response.status_code == 200
print(response.text)
def test_feed_generator(app):
"""Test the root feed generator."""
with app.test_request_context():
serve_feed('atom')