incorporeal-cms/incorporealcms/feed.py
Brian S. Stephan 7eb485c6ae
rewrite the project as a static site generator
this removes Flask, reworks a number of library methods accordingly, and
adds generators and build commands to process the instance directory
(largely unchanged, except config.py is now config.json) and spit out
files suitable to be served by a web server such as Nginx.

there are probably some rough edges here, but overall this works.

also note, as this is no longer server software on a network, the
license has changed from AGPLv3 to GPLv3, and the "or any later version"
allowance has been removed

Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
2025-03-16 23:56:37 -05:00

83 lines
3.4 KiB
Python

"""Generate Atom and RSS feeds based on content in a blog-ish location.
This parses a special root directory, feed/, for YYYYMMDD-foo.md files,
and combines them into an Atom or RSS feed. These files *should* be symlinks
to the real pages, which may mirror the same YYYYMMDD-foo.md file naming scheme
under pages/ (which may make sense for a blog) if they want, but could just
as well be pages/foo content.
SPDX-FileCopyrightText: © 2023 Brian S. Stephan <bss@incorporeal.org>
SPDX-License-Identifier: GPL-3.0-only
"""
import logging
import os
import re
from feedgen.feed import FeedGenerator
from incorporealcms.config import Config
from incorporealcms.markdown import instance_resource_path_to_request_path, parse_md
logger = logging.getLogger(__name__)
def generate_feed(feed_type: str, instance_dir: str, dest_dir: str) -> None:
"""Generate the Atom or RSS feed as requested.
Args:
feed_type: 'atom' or 'rss' feed
instance_dir: the directory for the instance, containing both the feed dir and pages
dest_dir: the directory to place the feed subdir and requested feed
"""
fg = FeedGenerator()
fg.id(f'https://{Config.DOMAIN_NAME}/')
fg.title(f'{Config.TITLE_SUFFIX}')
fg.author(Config.AUTHOR)
fg.link(href=f'https://{Config.DOMAIN_NAME}/feed/{feed_type}', rel='self')
fg.link(href=f'https://{Config.DOMAIN_NAME}', rel='alternate')
fg.subtitle(f"Blog posts and other interesting materials from {Config.TITLE_SUFFIX}")
# get recent feeds
feed_path = os.path.join(instance_dir, 'feed')
feed_entry_paths = [os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(feed_path)
for filename in filenames if os.path.islink(os.path.join(dirpath, filename))]
for feed_entry_path in sorted(feed_entry_paths):
# get the actual file to parse it
os.chdir(os.path.abspath(os.path.join(instance_dir, 'pages')))
resolved_path = os.path.relpath(os.path.realpath(feed_entry_path), os.path.join(instance_dir, 'pages'))
try:
content, md, page_name, page_title, mtime = parse_md(resolved_path)
link = f'https://{Config.DOMAIN_NAME}{instance_resource_path_to_request_path(resolved_path)}'
except (OSError, ValueError, TypeError):
logger.exception("error loading/rendering markdown!")
raise
fe = fg.add_entry()
fe.id(_generate_feed_id(feed_entry_path, instance_resource_path_to_request_path(resolved_path)))
fe.title(page_title)
fe.author(Config.AUTHOR)
fe.link(href=link)
fe.content(content, type='html')
if feed_type == 'rss':
try:
os.mkdir(os.path.join(dest_dir, 'feed'))
except FileExistsError:
pass
with open(os.path.join(dest_dir, 'feed', 'rss'), 'wb') as feed_file:
feed_file.write(fg.rss_str(pretty=True))
else:
try:
os.mkdir(os.path.join(dest_dir, 'feed'))
except FileExistsError:
pass
with open(os.path.join(dest_dir, 'feed', 'atom'), 'wb') as feed_file:
feed_file.write(fg.atom_str(pretty=True))
def _generate_feed_id(feed_entry_path, request_path):
"""For a relative file path, generate the Atom/RSS feed ID for it."""
date = re.sub(r'.*(\d{4})(\d{2})(\d{2}).*', r'\1-\2-\3', feed_entry_path)
cleaned = request_path.replace('#', '/')
return f'tag:{Config.DOMAIN_NAME},{date}:{cleaned}'