diff --git a/incorporealcms/__init__.py b/incorporealcms/__init__.py index f215772..c111cff 100644 --- a/incorporealcms/__init__.py +++ b/incorporealcms/__init__.py @@ -39,7 +39,8 @@ def create_app(instance_path=None, test_config=None): logger.info("RESPONSE: %s %s: %s", request.method, request.path, response.status) return response - from . import error_pages, pages, static + from . import error_pages, feed, pages, static + app.register_blueprint(feed.bp) app.register_blueprint(pages.bp) app.register_blueprint(static.bp) app.register_error_handler(400, error_pages.bad_request) diff --git a/incorporealcms/feed.py b/incorporealcms/feed.py new file mode 100644 index 0000000..923b8f1 --- /dev/null +++ b/incorporealcms/feed.py @@ -0,0 +1,71 @@ +"""Generate Atom and RSS feeds based on content in a blog-ish location. + +This parses a special root directory, feed/, for feed/YYYY/MM/DD/file files, +and combines them into an Atom or RSS feed. These files *should* be symlinks +to the real pages, which may mirror the same YYYY/MM/DD/file naming scheme +under pages/ (which may make sense for a blog) if they want, but could just +as well be pages/foo content. + +SPDX-FileCopyrightText: © 2023 Brian S. Stephan +SPDX-License-Identifier: AGPL-3.0-or-later +""" +import logging +import os +import re + +from feedgen.feed import FeedGenerator +from flask import Blueprint, abort +from flask import current_app as app + +from incorporealcms.lib import instance_resource_path_to_request_path, parse_md + +logger = logging.getLogger(__name__) + +bp = Blueprint('feed', __name__, url_prefix='/feed') + + +@bp.route('/') +def serve_feed(feed_type): + """Serve the Atom or RSS feed as requested.""" + logger.warning("wat") + if feed_type not in ('atom', 'rss'): + abort(404) + + fg = FeedGenerator() + fg.id(f'{app.config["DOMAIN_NAME"]}') + fg.title(f'{app.config["TITLE_SUFFIX"]}') + fg.link(href=f'https://{app.config["DOMAIN_NAME"]}/feed/{feed_type}', rel='self') + fg.link(href=f'https://{app.config["DOMAIN_NAME"]}', rel='alternate') + fg.subtitle(f"Blog posts and other dated materials from {app.config['TITLE_SUFFIX']}") + + # get recent feeds + feed_path = os.path.join(app.instance_path, 'feed') + feed_entry_paths = [os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(feed_path) + for filename in filenames if os.path.islink(os.path.join(dirpath, filename))] + for feed_entry_path in sorted(feed_entry_paths): + # get the actual file to parse it + resolved_path = os.path.realpath(feed_entry_path).replace(f'{app.instance_path}/', '') + try: + content, md, page_name, page_title, mtime = parse_md(resolved_path) + link = f'https://{app.config["DOMAIN_NAME"]}/{instance_resource_path_to_request_path(resolved_path)}' + except (OSError, ValueError, TypeError): + logger.exception("error loading/rendering markdown!") + abort(500) + + fe = fg.add_entry() + fe.id(_generate_feed_id(feed_entry_path)) + fe.title(page_name if page_name else page_title) + fe.link(href=link) + fe.content(content, type='html') + + if feed_type == 'atom': + return fg.atom_str(pretty=True) + else: + return fg.rss_str(pretty=True) + + +def _generate_feed_id(feed_entry_path): + """For a relative file path, generate the Atom/RSS feed ID for it.""" + date = re.sub(r'.*/(\d+)/(\d+)/(\d+).*', r'\1-\2-\3', feed_entry_path) + cleaned = feed_entry_path.replace('#', '/').replace('feed/', '', 1).replace(app.instance_path, '') + return f'tag:{app.config["DOMAIN_NAME"]},{date}:{cleaned}' diff --git a/incorporealcms/lib.py b/incorporealcms/lib.py index b25a773..c4e323a 100644 --- a/incorporealcms/lib.py +++ b/incorporealcms/lib.py @@ -3,11 +3,15 @@ SPDX-FileCopyrightText: © 2021 Brian S. Stephan SPDX-License-Identifier: AGPL-3.0-or-later """ +import datetime import logging +import os +import re import markdown from flask import current_app as app from flask import make_response, render_template, request +from markupsafe import Markup logger = logging.getLogger(__name__) @@ -30,6 +34,45 @@ def init_md(): extension_configs=app.config['MARKDOWN_EXTENSION_CONFIGS']) +def instance_resource_path_to_request_path(path): + """Reverse a (presumed to exist) RELATIVE disk path to the canonical path that would show up in a Flask route. + + This does not include the leading /, so aside from the root index case, this should be + bidirectional. + """ + return re.sub(r'^pages/', '', re.sub(r'.md$', '', re.sub(r'index.md$', '', path))) + + +def parse_md(resolved_path): + """Given a file to parse, return file content and other derived data along with the md object.""" + try: + logger.debug("opening resolved path '%s'", resolved_path) + with app.open_instance_resource(resolved_path, 'r') as entry_file: + mtime = datetime.datetime.fromtimestamp(os.path.getmtime(entry_file.name), tz=datetime.timezone.utc) + entry = entry_file.read() + logger.debug("resolved path '%s' read", resolved_path) + md = init_md() + content = Markup(md.convert(entry)) + except OSError: + logger.exception("resolved path '%s' could not be opened!", resolved_path) + raise + except ValueError: + logger.exception("error parsing/rendering markdown!") + raise + except TypeError: + logger.exception("error loading/rendering markdown!") + raise + + logger.debug("file metadata: %s", md.Meta) + + page_name = (get_meta_str(md, 'title') if md.Meta.get('title') else + f'/{instance_resource_path_to_request_path(resolved_path)}') + page_title = f'{page_name} - {app.config["TITLE_SUFFIX"]}' if page_name else app.config['TITLE_SUFFIX'] + logger.debug("title (potentially derived): %s", page_title) + + return content, md, page_name, page_title, mtime + + def render(template_name_or_list, **context): """Wrap Flask's render_template. diff --git a/incorporealcms/pages.py b/incorporealcms/pages.py index ceea938..abdfa38 100644 --- a/incorporealcms/pages.py +++ b/incorporealcms/pages.py @@ -3,10 +3,8 @@ SPDX-FileCopyrightText: © 2020 Brian S. Stephan SPDX-License-Identifier: AGPL-3.0-or-later """ -import datetime import logging import os -import re from flask import Blueprint, abort from flask import current_app as app @@ -14,7 +12,7 @@ from flask import redirect, request, send_from_directory from markupsafe import Markup from werkzeug.security import safe_join -from incorporealcms.lib import get_meta_str, init_md, render +from incorporealcms.lib import get_meta_str, init_md, instance_resource_path_to_request_path, parse_md, render logger = logging.getLogger(__name__) @@ -54,36 +52,19 @@ def display_page(path): def handle_markdown_file_path(resolved_path): """Given a location on disk, attempt to open it and render the markdown within.""" try: - logger.debug("opening resolved path '%s'", resolved_path) - with app.open_instance_resource(resolved_path, 'r') as entry_file: - mtime = datetime.datetime.fromtimestamp(os.path.getmtime(entry_file.name), tz=datetime.timezone.utc) - entry = entry_file.read() - logger.debug("resolved path '%s' read", resolved_path) + content, md, page_name, page_title, mtime = parse_md(resolved_path) except OSError: logger.exception("resolved path '%s' could not be opened!", resolved_path) abort(500) + except ValueError: + logger.exception("error parsing/rendering markdown!") + abort(500) + except TypeError: + logger.exception("error loading/rendering markdown!") + abort(500) else: - try: - md = init_md() - content = Markup(md.convert(entry)) - except ValueError: - logger.exception("error parsing/rendering markdown!") - abort(500) - except TypeError: - logger.exception("error loading/rendering markdown!") - abort(500) - - logger.debug("file metadata: %s", md.Meta) - parent_navs = generate_parent_navs(resolved_path) - - page_name = (get_meta_str(md, 'title') if md.Meta.get('title') else - f'/{instance_resource_path_to_request_path(resolved_path)}') - page_title = f'{page_name} - {app.config["TITLE_SUFFIX"]}' if page_name else app.config['TITLE_SUFFIX'] - logger.debug("title (potentially derived): %s", page_title) - extra_footer = get_meta_str(md, 'footer') if md.Meta.get('footer') else None - template = get_meta_str(md, 'template') if md.Meta.get('template') else 'base.html' # check if this has a HTTP redirect @@ -156,15 +137,6 @@ def request_path_to_instance_resource_path(path): return resolved_path.replace(f'{app.instance_path}{os.path.sep}', ''), 'markdown' -def instance_resource_path_to_request_path(path): - """Reverse a (presumed to exist) disk path to the canonical path that would show up in a Flask route. - - This does not include the leading /, so aside from the root index case, this should be - bidirectional. - """ - return re.sub(r'^pages/', '', re.sub(r'.md$', '', re.sub(r'index.md$', '', path))) - - def generate_parent_navs(path): """Create a series of paths/links to navigate up from the given resource path.""" if path == 'pages/index.md': diff --git a/pyproject.toml b/pyproject.toml index 9deaf29..bc2366b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ authors = [ {name = "Brian S. Stephan", email = "bss@incorporeal.org"}, ] requires-python = ">=3.8" -dependencies = ["Flask", "Markdown"] +dependencies = ["feedgen", "Flask", "Markdown"] dynamic = ["version"] classifiers = [ "Framework :: Flask", diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 3357f16..f8ef791 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -35,6 +35,8 @@ dlint==0.14.1 # via incorporeal-cms (pyproject.toml) dparse==0.6.3 # via safety +feedgen==1.0.0 + # via incorporeal-cms (pyproject.toml) filelock==3.13.1 # via # tox @@ -83,6 +85,8 @@ itsdangerous==2.1.2 # via flask jinja2==3.1.2 # via flask +lxml==5.0.0 + # via feedgen markdown==3.5.1 # via incorporeal-cms (pyproject.toml) markdown-it-py==3.0.0 @@ -144,6 +148,8 @@ pytest==7.4.3 # pytest-cov pytest-cov==4.1.0 # via incorporeal-cms (pyproject.toml) +python-dateutil==2.8.2 + # via feedgen pyyaml==6.0.1 # via bandit requests==2.31.0 @@ -158,6 +164,8 @@ safety==2.3.5 # via incorporeal-cms (pyproject.toml) setuptools-scm==8.0.4 # via incorporeal-cms (pyproject.toml) +six==1.16.0 + # via python-dateutil smmap==5.0.1 # via gitdb snowballstemmer==2.2.0 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index abe3742..327b89a 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -8,17 +8,25 @@ blinker==1.7.0 # via flask click==8.1.7 # via flask +feedgen==1.0.0 + # via incorporeal-cms (pyproject.toml) flask==3.0.0 # via incorporeal-cms (pyproject.toml) itsdangerous==2.1.2 # via flask jinja2==3.1.2 # via flask +lxml==5.0.0 + # via feedgen markdown==3.5.1 # via incorporeal-cms (pyproject.toml) markupsafe==2.1.3 # via # jinja2 # werkzeug +python-dateutil==2.8.2 + # via feedgen +six==1.16.0 + # via python-dateutil werkzeug==3.0.1 # via flask diff --git a/tests/instance/feed/2023/12/01/forced-no-title.md b/tests/instance/feed/2023/12/01/forced-no-title.md new file mode 120000 index 0000000..cf293c3 --- /dev/null +++ b/tests/instance/feed/2023/12/01/forced-no-title.md @@ -0,0 +1 @@ +../../../../pages/forced-no-title.md \ No newline at end of file diff --git a/tests/instance/feed/2023/12/30/page.md b/tests/instance/feed/2023/12/30/page.md new file mode 120000 index 0000000..b0871a3 --- /dev/null +++ b/tests/instance/feed/2023/12/30/page.md @@ -0,0 +1 @@ +../../../../pages/subdir-with-title/page.md \ No newline at end of file diff --git a/tests/test_feed.py b/tests/test_feed.py new file mode 100644 index 0000000..c98db6e --- /dev/null +++ b/tests/test_feed.py @@ -0,0 +1,32 @@ +"""Test the feed methods. + +SPDX-FileCopyrightText: © 2023 Brian S. Stephan +SPDX-License-Identifier: AGPL-3.0-or-later +""" +from incorporealcms.feed import serve_feed + + +def test_unknown_type_is_404(client): + """Test that requesting a feed type that doesn't exist is a 404.""" + response = client.get('/feed/wat') + assert response.status_code == 404 + + +def test_atom_type_is_200(client): + """Test that requesting an ATOM feed is found.""" + response = client.get('/feed/atom') + assert response.status_code == 200 + print(response.text) + + +def test_rss_type_is_200(client): + """Test that requesting an RSS feed is found.""" + response = client.get('/feed/rss') + assert response.status_code == 200 + print(response.text) + + +def test_feed_generator(app): + """Test the root feed generator.""" + with app.test_request_context(): + serve_feed('atom')