From 6dc443e59fe54119fa542af8b1cc249c0c7e68fa Mon Sep 17 00:00:00 2001 From: "Brian S. Stephan" Date: Sat, 30 Dec 2023 13:23:13 -0600 Subject: [PATCH] implement a rudimentary Atom/RSS feed module this provides a somewhat unconfigurable (at the moment) feed module which provides Atom and RSS feeds. entries are determined by symlinks to content pages, because my core CMS usage is still more general and not blog-like. the symlinks allow for arbitrarily adding entries as I see fit. this also moves core Markdown parser stuff to the library module, since that's used by the feed as well as normal pages Signed-off-by: Brian S. Stephan --- incorporealcms/__init__.py | 3 +- incorporealcms/feed.py | 71 +++++++++++++++++++ incorporealcms/lib.py | 43 +++++++++++ incorporealcms/pages.py | 44 +++--------- pyproject.toml | 2 +- requirements/requirements-dev.txt | 8 +++ requirements/requirements.txt | 8 +++ .../feed/2023/12/01/forced-no-title.md | 1 + tests/instance/feed/2023/12/30/page.md | 1 + tests/test_feed.py | 32 +++++++++ 10 files changed, 175 insertions(+), 38 deletions(-) create mode 100644 incorporealcms/feed.py create mode 120000 tests/instance/feed/2023/12/01/forced-no-title.md create mode 120000 tests/instance/feed/2023/12/30/page.md create mode 100644 tests/test_feed.py diff --git a/incorporealcms/__init__.py b/incorporealcms/__init__.py index f215772..c111cff 100644 --- a/incorporealcms/__init__.py +++ b/incorporealcms/__init__.py @@ -39,7 +39,8 @@ def create_app(instance_path=None, test_config=None): logger.info("RESPONSE: %s %s: %s", request.method, request.path, response.status) return response - from . import error_pages, pages, static + from . import error_pages, feed, pages, static + app.register_blueprint(feed.bp) app.register_blueprint(pages.bp) app.register_blueprint(static.bp) app.register_error_handler(400, error_pages.bad_request) diff --git a/incorporealcms/feed.py b/incorporealcms/feed.py new file mode 100644 index 0000000..923b8f1 --- /dev/null +++ b/incorporealcms/feed.py @@ -0,0 +1,71 @@ +"""Generate Atom and RSS feeds based on content in a blog-ish location. + +This parses a special root directory, feed/, for feed/YYYY/MM/DD/file files, +and combines them into an Atom or RSS feed. These files *should* be symlinks +to the real pages, which may mirror the same YYYY/MM/DD/file naming scheme +under pages/ (which may make sense for a blog) if they want, but could just +as well be pages/foo content. + +SPDX-FileCopyrightText: © 2023 Brian S. Stephan +SPDX-License-Identifier: AGPL-3.0-or-later +""" +import logging +import os +import re + +from feedgen.feed import FeedGenerator +from flask import Blueprint, abort +from flask import current_app as app + +from incorporealcms.lib import instance_resource_path_to_request_path, parse_md + +logger = logging.getLogger(__name__) + +bp = Blueprint('feed', __name__, url_prefix='/feed') + + +@bp.route('/') +def serve_feed(feed_type): + """Serve the Atom or RSS feed as requested.""" + logger.warning("wat") + if feed_type not in ('atom', 'rss'): + abort(404) + + fg = FeedGenerator() + fg.id(f'{app.config["DOMAIN_NAME"]}') + fg.title(f'{app.config["TITLE_SUFFIX"]}') + fg.link(href=f'https://{app.config["DOMAIN_NAME"]}/feed/{feed_type}', rel='self') + fg.link(href=f'https://{app.config["DOMAIN_NAME"]}', rel='alternate') + fg.subtitle(f"Blog posts and other dated materials from {app.config['TITLE_SUFFIX']}") + + # get recent feeds + feed_path = os.path.join(app.instance_path, 'feed') + feed_entry_paths = [os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(feed_path) + for filename in filenames if os.path.islink(os.path.join(dirpath, filename))] + for feed_entry_path in sorted(feed_entry_paths): + # get the actual file to parse it + resolved_path = os.path.realpath(feed_entry_path).replace(f'{app.instance_path}/', '') + try: + content, md, page_name, page_title, mtime = parse_md(resolved_path) + link = f'https://{app.config["DOMAIN_NAME"]}/{instance_resource_path_to_request_path(resolved_path)}' + except (OSError, ValueError, TypeError): + logger.exception("error loading/rendering markdown!") + abort(500) + + fe = fg.add_entry() + fe.id(_generate_feed_id(feed_entry_path)) + fe.title(page_name if page_name else page_title) + fe.link(href=link) + fe.content(content, type='html') + + if feed_type == 'atom': + return fg.atom_str(pretty=True) + else: + return fg.rss_str(pretty=True) + + +def _generate_feed_id(feed_entry_path): + """For a relative file path, generate the Atom/RSS feed ID for it.""" + date = re.sub(r'.*/(\d+)/(\d+)/(\d+).*', r'\1-\2-\3', feed_entry_path) + cleaned = feed_entry_path.replace('#', '/').replace('feed/', '', 1).replace(app.instance_path, '') + return f'tag:{app.config["DOMAIN_NAME"]},{date}:{cleaned}' diff --git a/incorporealcms/lib.py b/incorporealcms/lib.py index b25a773..c4e323a 100644 --- a/incorporealcms/lib.py +++ b/incorporealcms/lib.py @@ -3,11 +3,15 @@ SPDX-FileCopyrightText: © 2021 Brian S. Stephan SPDX-License-Identifier: AGPL-3.0-or-later """ +import datetime import logging +import os +import re import markdown from flask import current_app as app from flask import make_response, render_template, request +from markupsafe import Markup logger = logging.getLogger(__name__) @@ -30,6 +34,45 @@ def init_md(): extension_configs=app.config['MARKDOWN_EXTENSION_CONFIGS']) +def instance_resource_path_to_request_path(path): + """Reverse a (presumed to exist) RELATIVE disk path to the canonical path that would show up in a Flask route. + + This does not include the leading /, so aside from the root index case, this should be + bidirectional. + """ + return re.sub(r'^pages/', '', re.sub(r'.md$', '', re.sub(r'index.md$', '', path))) + + +def parse_md(resolved_path): + """Given a file to parse, return file content and other derived data along with the md object.""" + try: + logger.debug("opening resolved path '%s'", resolved_path) + with app.open_instance_resource(resolved_path, 'r') as entry_file: + mtime = datetime.datetime.fromtimestamp(os.path.getmtime(entry_file.name), tz=datetime.timezone.utc) + entry = entry_file.read() + logger.debug("resolved path '%s' read", resolved_path) + md = init_md() + content = Markup(md.convert(entry)) + except OSError: + logger.exception("resolved path '%s' could not be opened!", resolved_path) + raise + except ValueError: + logger.exception("error parsing/rendering markdown!") + raise + except TypeError: + logger.exception("error loading/rendering markdown!") + raise + + logger.debug("file metadata: %s", md.Meta) + + page_name = (get_meta_str(md, 'title') if md.Meta.get('title') else + f'/{instance_resource_path_to_request_path(resolved_path)}') + page_title = f'{page_name} - {app.config["TITLE_SUFFIX"]}' if page_name else app.config['TITLE_SUFFIX'] + logger.debug("title (potentially derived): %s", page_title) + + return content, md, page_name, page_title, mtime + + def render(template_name_or_list, **context): """Wrap Flask's render_template. diff --git a/incorporealcms/pages.py b/incorporealcms/pages.py index ceea938..abdfa38 100644 --- a/incorporealcms/pages.py +++ b/incorporealcms/pages.py @@ -3,10 +3,8 @@ SPDX-FileCopyrightText: © 2020 Brian S. Stephan SPDX-License-Identifier: AGPL-3.0-or-later """ -import datetime import logging import os -import re from flask import Blueprint, abort from flask import current_app as app @@ -14,7 +12,7 @@ from flask import redirect, request, send_from_directory from markupsafe import Markup from werkzeug.security import safe_join -from incorporealcms.lib import get_meta_str, init_md, render +from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md, render logger = logging.getLogger(__name__) @@ -54,36 +52,19 @@ def display_page(path): def handle_markdown_file_path(resolved_path): """Given a location on disk, attempt to open it and render the markdown within.""" try: - logger.debug("opening resolved path '%s'", resolved_path) - with app.open_instance_resource(resolved_path, 'r') as entry_file: - mtime = datetime.datetime.fromtimestamp(os.path.getmtime(entry_file.name), tz=datetime.timezone.utc) - entry = entry_file.read() - logger.debug("resolved path '%s' read", resolved_path) + content, md, page_name, page_title, mtime = parse_md(resolved_path) except OSError: logger.exception("resolved path '%s' could not be opened!", resolved_path) abort(500) + except ValueError: + logger.exception("error parsing/rendering markdown!") + abort(500) + except TypeError: + logger.exception("error loading/rendering markdown!") + abort(500) else: - try: - md = init_md() - content = Markup(md.convert(entry)) - except ValueError: - logger.exception("error parsing/rendering markdown!") - abort(500) - except TypeError: - logger.exception("error loading/rendering markdown!") - abort(500) - - logger.debug("file metadata: %s", md.Meta) - parent_navs = generate_parent_navs(resolved_path) - - page_name = (get_meta_str(md, 'title') if md.Meta.get('title') else - f'/{instance_resource_path_to_request_path(resolved_path)}') - page_title = f'{page_name} - {app.config["TITLE_SUFFIX"]}' if page_name else app.config['TITLE_SUFFIX'] - logger.debug("title (potentially derived): %s", page_title) - extra_footer = get_meta_str(md, 'footer') if md.Meta.get('footer') else None - template = get_meta_str(md, 'template') if md.Meta.get('template') else 'base.html' # check if this has a HTTP redirect @@ -156,15 +137,6 @@ def request_path_to_instance_resource_path(path): return resolved_path.replace(f'{app.instance_path}{os.path.sep}', ''), 'markdown' -def instance_resource_path_to_request_path(path): - """Reverse a (presumed to exist) disk path to the canonical path that would show up in a Flask route. - - This does not include the leading /, so aside from the root index case, this should be - bidirectional. - """ - return re.sub(r'^pages/', '', re.sub(r'.md$', '', re.sub(r'index.md$', '', path))) - - def generate_parent_navs(path): """Create a series of paths/links to navigate up from the given resource path.""" if path == 'pages/index.md': diff --git a/pyproject.toml b/pyproject.toml index 9deaf29..bc2366b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ authors = [ {name = "Brian S. Stephan", email = "bss@incorporeal.org"}, ] requires-python = ">=3.8" -dependencies = ["Flask", "Markdown"] +dependencies = ["feedgen", "Flask", "Markdown"] dynamic = ["version"] classifiers = [ "Framework :: Flask", diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 3357f16..f8ef791 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -35,6 +35,8 @@ dlint==0.14.1 # via incorporeal-cms (pyproject.toml) dparse==0.6.3 # via safety +feedgen==1.0.0 + # via incorporeal-cms (pyproject.toml) filelock==3.13.1 # via # tox @@ -83,6 +85,8 @@ itsdangerous==2.1.2 # via flask jinja2==3.1.2 # via flask +lxml==5.0.0 + # via feedgen markdown==3.5.1 # via incorporeal-cms (pyproject.toml) markdown-it-py==3.0.0 @@ -144,6 +148,8 @@ pytest==7.4.3 # pytest-cov pytest-cov==4.1.0 # via incorporeal-cms (pyproject.toml) +python-dateutil==2.8.2 + # via feedgen pyyaml==6.0.1 # via bandit requests==2.31.0 @@ -158,6 +164,8 @@ safety==2.3.5 # via incorporeal-cms (pyproject.toml) setuptools-scm==8.0.4 # via incorporeal-cms (pyproject.toml) +six==1.16.0 + # via python-dateutil smmap==5.0.1 # via gitdb snowballstemmer==2.2.0 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index abe3742..327b89a 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -8,17 +8,25 @@ blinker==1.7.0 # via flask click==8.1.7 # via flask +feedgen==1.0.0 + # via incorporeal-cms (pyproject.toml) flask==3.0.0 # via incorporeal-cms (pyproject.toml) itsdangerous==2.1.2 # via flask jinja2==3.1.2 # via flask +lxml==5.0.0 + # via feedgen markdown==3.5.1 # via incorporeal-cms (pyproject.toml) markupsafe==2.1.3 # via # jinja2 # werkzeug +python-dateutil==2.8.2 + # via feedgen +six==1.16.0 + # via python-dateutil werkzeug==3.0.1 # via flask diff --git a/tests/instance/feed/2023/12/01/forced-no-title.md b/tests/instance/feed/2023/12/01/forced-no-title.md new file mode 120000 index 0000000..cf293c3 --- /dev/null +++ b/tests/instance/feed/2023/12/01/forced-no-title.md @@ -0,0 +1 @@ +../../../../pages/forced-no-title.md \ No newline at end of file diff --git a/tests/instance/feed/2023/12/30/page.md b/tests/instance/feed/2023/12/30/page.md new file mode 120000 index 0000000..b0871a3 --- /dev/null +++ b/tests/instance/feed/2023/12/30/page.md @@ -0,0 +1 @@ +../../../../pages/subdir-with-title/page.md \ No newline at end of file diff --git a/tests/test_feed.py b/tests/test_feed.py new file mode 100644 index 0000000..c98db6e --- /dev/null +++ b/tests/test_feed.py @@ -0,0 +1,32 @@ +"""Test the feed methods. + +SPDX-FileCopyrightText: © 2023 Brian S. Stephan +SPDX-License-Identifier: AGPL-3.0-or-later +""" +from incorporealcms.feed import serve_feed + + +def test_unknown_type_is_404(client): + """Test that requesting a feed type that doesn't exist is a 404.""" + response = client.get('/feed/wat') + assert response.status_code == 404 + + +def test_atom_type_is_200(client): + """Test that requesting an ATOM feed is found.""" + response = client.get('/feed/atom') + assert response.status_code == 200 + print(response.text) + + +def test_rss_type_is_200(client): + """Test that requesting an RSS feed is found.""" + response = client.get('/feed/rss') + assert response.status_code == 200 + print(response.text) + + +def test_feed_generator(app): + """Test the root feed generator.""" + with app.test_request_context(): + serve_feed('atom')