implement a rudimentary Atom/RSS feed module

this provides a somewhat unconfigurable (at the moment) feed module
which provides Atom and RSS feeds. entries are determined by symlinks to
content pages, because my core CMS usage is still more general and not
blog-like. the symlinks allow for arbitrarily adding entries as I see
fit.

this also moves core Markdown parser stuff to the library module, since
that's used by the feed as well as normal pages

Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
This commit is contained in:
2023-12-30 13:23:13 -06:00
parent 5a9a36f463
commit 6dc443e59f
10 changed files with 175 additions and 38 deletions

71
incorporealcms/feed.py Normal file
View File

@@ -0,0 +1,71 @@
"""Generate Atom and RSS feeds based on content in a blog-ish location.
This parses a special root directory, feed/, for feed/YYYY/MM/DD/file files,
and combines them into an Atom or RSS feed. These files *should* be symlinks
to the real pages, which may mirror the same YYYY/MM/DD/file naming scheme
under pages/ (which may make sense for a blog) if they want, but could just
as well be pages/foo content.
SPDX-FileCopyrightText: © 2023 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: AGPL-3.0-or-later
"""
import logging
import os
import re
from feedgen.feed import FeedGenerator
from flask import Blueprint, abort
from flask import current_app as app
from incorporealcms.lib import instance_resource_path_to_request_path, parse_md
logger = logging.getLogger(__name__)
bp = Blueprint('feed', __name__, url_prefix='/feed')
@bp.route('/<feed_type>')
def serve_feed(feed_type):
"""Serve the Atom or RSS feed as requested."""
logger.warning("wat")
if feed_type not in ('atom', 'rss'):
abort(404)
fg = FeedGenerator()
fg.id(f'{app.config["DOMAIN_NAME"]}')
fg.title(f'{app.config["TITLE_SUFFIX"]}')
fg.link(href=f'https://{app.config["DOMAIN_NAME"]}/feed/{feed_type}', rel='self')
fg.link(href=f'https://{app.config["DOMAIN_NAME"]}', rel='alternate')
fg.subtitle(f"Blog posts and other dated materials from {app.config['TITLE_SUFFIX']}")
# get recent feeds
feed_path = os.path.join(app.instance_path, 'feed')
feed_entry_paths = [os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(feed_path)
for filename in filenames if os.path.islink(os.path.join(dirpath, filename))]
for feed_entry_path in sorted(feed_entry_paths):
# get the actual file to parse it
resolved_path = os.path.realpath(feed_entry_path).replace(f'{app.instance_path}/', '')
try:
content, md, page_name, page_title, mtime = parse_md(resolved_path)
link = f'https://{app.config["DOMAIN_NAME"]}/{instance_resource_path_to_request_path(resolved_path)}'
except (OSError, ValueError, TypeError):
logger.exception("error loading/rendering markdown!")
abort(500)
fe = fg.add_entry()
fe.id(_generate_feed_id(feed_entry_path))
fe.title(page_name if page_name else page_title)
fe.link(href=link)
fe.content(content, type='html')
if feed_type == 'atom':
return fg.atom_str(pretty=True)
else:
return fg.rss_str(pretty=True)
def _generate_feed_id(feed_entry_path):
"""For a relative file path, generate the Atom/RSS feed ID for it."""
date = re.sub(r'.*/(\d+)/(\d+)/(\d+).*', r'\1-\2-\3', feed_entry_path)
cleaned = feed_entry_path.replace('#', '/').replace('feed/', '', 1).replace(app.instance_path, '')
return f'tag:{app.config["DOMAIN_NAME"]},{date}:{cleaned}'