implement a rudimentary Atom/RSS feed module

this provides a somewhat unconfigurable (at the moment) feed module which provides Atom and RSS feeds. entries are determined by symlinks to content pages, because my core CMS usage is still more general and not blog-like. the symlinks allow for arbitrarily adding entries as I see fit. this also moves core Markdown parser stuff to the library module, since that's used by the feed as well as normal pages Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
2023-12-30 13:23:13 -06:00
parent 5a9a36f463
commit 6dc443e59f
10 changed files with 175 additions and 38 deletions
--- a/incorporealcms/feed.py
+++ b/incorporealcms/feed.py
@@ -0,0 +1,71 @@
+"""Generate Atom and RSS feeds based on content in a blog-ish location.
+
+This parses a special root directory, feed/, for feed/YYYY/MM/DD/file files,
+and combines them into an Atom or RSS feed. These files *should* be symlinks
+to the real pages, which may mirror the same YYYY/MM/DD/file naming scheme
+under pages/ (which may make sense for a blog) if they want, but could just
+as well be pages/foo content.
+
+SPDX-FileCopyrightText: © 2023 Brian S. Stephan <bss@incorporeal.org>
+SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+import logging
+import os
+import re
+
+from feedgen.feed import FeedGenerator
+from flask import Blueprint, abort
+from flask import current_app as app
+
+from incorporealcms.lib import instance_resource_path_to_request_path, parse_md
+
+logger = logging.getLogger(__name__)
+
+bp = Blueprint('feed', __name__, url_prefix='/feed')
+
+
+@bp.route('/<feed_type>')
+def serve_feed(feed_type):
+    """Serve the Atom or RSS feed as requested."""
+    logger.warning("wat")
+    if feed_type not in ('atom', 'rss'):
+        abort(404)
+
+    fg = FeedGenerator()
+    fg.id(f'{app.config["DOMAIN_NAME"]}')
+    fg.title(f'{app.config["TITLE_SUFFIX"]}')
+    fg.link(href=f'https://{app.config["DOMAIN_NAME"]}/feed/{feed_type}', rel='self')
+    fg.link(href=f'https://{app.config["DOMAIN_NAME"]}', rel='alternate')
+    fg.subtitle(f"Blog posts and other dated materials from {app.config['TITLE_SUFFIX']}")
+
+    # get recent feeds
+    feed_path = os.path.join(app.instance_path, 'feed')
+    feed_entry_paths = [os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(feed_path)
+                        for filename in filenames if os.path.islink(os.path.join(dirpath, filename))]
+    for feed_entry_path in sorted(feed_entry_paths):
+        # get the actual file to parse it
+        resolved_path = os.path.realpath(feed_entry_path).replace(f'{app.instance_path}/', '')
+        try:
+            content, md, page_name, page_title, mtime = parse_md(resolved_path)
+            link = f'https://{app.config["DOMAIN_NAME"]}/{instance_resource_path_to_request_path(resolved_path)}'
+        except (OSError, ValueError, TypeError):
+            logger.exception("error loading/rendering markdown!")
+            abort(500)
+
+        fe = fg.add_entry()
+        fe.id(_generate_feed_id(feed_entry_path))
+        fe.title(page_name if page_name else page_title)
+        fe.link(href=link)
+        fe.content(content, type='html')
+
+    if feed_type == 'atom':
+        return fg.atom_str(pretty=True)
+    else:
+        return fg.rss_str(pretty=True)
+
+
+def _generate_feed_id(feed_entry_path):
+    """For a relative file path, generate the Atom/RSS feed ID for it."""
+    date = re.sub(r'.*/(\d+)/(\d+)/(\d+).*', r'\1-\2-\3', feed_entry_path)
+    cleaned = feed_entry_path.replace('#', '/').replace('feed/', '', 1).replace(app.instance_path, '')
+    return f'tag:{app.config["DOMAIN_NAME"]},{date}:{cleaned}'