os.chdir was getting confusing and hurting the log output, and potentially the cause of a couple bugs left to fix, so this removes it, but it means we need to pass around the pages/ absolute path into the markdown parser, because it relies on knowing both the absolute path now (to open files), and also the path relative to the pages dir in order to know where to stop reading parent files/how to generate proper URL-like references to other files. probably this should be refactored at some point to inherit the pages/ path from the SSG somehow, rather than passing it through a bunch of methods, but this seems to work for now fixes #22 Signed-off-by: Brian S. Stephan <bss@incorporeal.org>
88 lines
3.6 KiB
Python
88 lines
3.6 KiB
Python
"""Generate Atom and RSS feeds based on content in a blog-ish location.
|
|
|
|
This parses a special root directory, feed/, for YYYYMMDD-foo.md files,
|
|
and combines them into an Atom or RSS feed. These files *should* be symlinks
|
|
to the real pages, which may mirror the same YYYYMMDD-foo.md file naming scheme
|
|
under pages/ (which may make sense for a blog) if they want, but could just
|
|
as well be pages/foo content.
|
|
|
|
SPDX-FileCopyrightText: © 2023 Brian S. Stephan <bss@incorporeal.org>
|
|
SPDX-License-Identifier: GPL-3.0-or-later
|
|
"""
|
|
import logging
|
|
import os
|
|
import re
|
|
|
|
from feedgen.feed import FeedGenerator
|
|
|
|
from incorporealcms.config import Config
|
|
from incorporealcms.markdown import instance_resource_path_to_request_path, parse_md
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def generate_feed(feed_type: str, instance_dir: str, dest_dir: str) -> None:
|
|
"""Generate the Atom or RSS feed as requested.
|
|
|
|
Feed entries should be symlinks to .md files in the pages/ directory, so that they
|
|
are also linkable and can be browsed outside of the feed.
|
|
|
|
Args:
|
|
feed_type: 'atom' or 'rss' feed
|
|
instance_dir: the directory for the instance, containing both the feed dir and pages
|
|
dest_dir: the directory to place the feed subdir and requested feed
|
|
"""
|
|
fg = FeedGenerator()
|
|
fg.id(f'https://{Config.DOMAIN_NAME}/')
|
|
fg.title(f'{Config.TITLE_SUFFIX}')
|
|
fg.author(Config.AUTHOR)
|
|
fg.link(href=f'https://{Config.DOMAIN_NAME}/feed/{feed_type}', rel='self')
|
|
fg.link(href=f'https://{Config.DOMAIN_NAME}', rel='alternate')
|
|
fg.subtitle(f"Blog posts and other interesting materials from {Config.TITLE_SUFFIX}")
|
|
|
|
# feed symlinks should all be within the core content subdirectory
|
|
pages_dir = os.path.join(instance_dir, 'pages')
|
|
|
|
# get recent feeds
|
|
feed_path = os.path.join(instance_dir, 'feed')
|
|
feed_entry_paths = [os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(feed_path)
|
|
for filename in filenames if os.path.islink(os.path.join(dirpath, filename))]
|
|
for feed_entry_path in sorted(feed_entry_paths):
|
|
# get the actual file to parse it
|
|
resolved_path = os.path.relpath(os.path.realpath(feed_entry_path), pages_dir)
|
|
try:
|
|
content, md, page_name, page_title, mtime = parse_md(os.path.join(pages_dir, resolved_path), pages_dir)
|
|
link = f'https://{Config.DOMAIN_NAME}{instance_resource_path_to_request_path(resolved_path)}'
|
|
except (OSError, ValueError, TypeError):
|
|
logger.exception("error loading/rendering markdown!")
|
|
raise
|
|
|
|
fe = fg.add_entry()
|
|
fe.id(_generate_feed_id(feed_entry_path, instance_resource_path_to_request_path(resolved_path)))
|
|
fe.title(page_title)
|
|
fe.author(Config.AUTHOR)
|
|
fe.link(href=link)
|
|
fe.content(content, type='html')
|
|
|
|
if feed_type == 'rss':
|
|
try:
|
|
os.mkdir(os.path.join(dest_dir, 'feed'))
|
|
except FileExistsError:
|
|
pass
|
|
with open(os.path.join(dest_dir, 'feed', 'rss'), 'wb') as feed_file:
|
|
feed_file.write(fg.rss_str(pretty=True))
|
|
else:
|
|
try:
|
|
os.mkdir(os.path.join(dest_dir, 'feed'))
|
|
except FileExistsError:
|
|
pass
|
|
with open(os.path.join(dest_dir, 'feed', 'atom'), 'wb') as feed_file:
|
|
feed_file.write(fg.atom_str(pretty=True))
|
|
|
|
|
|
def _generate_feed_id(feed_entry_path, request_path):
|
|
"""For a relative file path, generate the Atom/RSS feed ID for it."""
|
|
date = re.sub(r'.*(\d{4})(\d{2})(\d{2}).*', r'\1-\2-\3', feed_entry_path)
|
|
cleaned = request_path.replace('#', '/')
|
|
return f'tag:{Config.DOMAIN_NAME},{date}:{cleaned}'
|