From 5c5cd14030ef9d0c70d5c0d8d9e7b4250b86477c Mon Sep 17 00:00:00 2001 From: Christopher Arndt Date: Sun, 23 Jul 2023 23:09:55 +0200 Subject: [PATCH] feat: support markdown extension and bleach generated HTML Add config settings for enabled markdown extensions and allowed tags and attributes in HTML output with sensible defaults Signed-off-by: Christopher Arndt --- Dockerfile | 2 +- README.md | 27 ++++++++++++++++++ matrixchat-notify.py | 66 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 89 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index deac353..c9b6788 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ FROM python:3.11-alpine -RUN python3 -m pip --no-cache-dir install markdown matrix-nio +RUN python3 -m pip --no-cache-dir install bleach markdown matrix-nio ADD matrixchat-notify.py /bin/ ADD matrixchat-notify-config.json /etc/ RUN chmod +x /bin/matrixchat-notify.py diff --git a/README.md b/README.md index 7801411..54db743 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,23 @@ steps: ## Configuration settings +* `allowed_tags` *(default:* [`DEFAULT_ALLOWED_TAGS`]*)* + + List or set or string with comma-separated list of HTML tag names. HTML + tags not included, will be stripped from the HTML output generated by + rendering a Markdown message template. + + Note that the default list does not include any tags, which allow to load + external resources when the generated HTML is displayed, notably `img` + is not included. + +* `allowed_attrs` *(default:* [`DEFAULT_ALLOWED_ATTRS`]*)* + + List or string with comma-separated list of HTML attribute names or + dict mapping tag names to lists of attributes names. + + See the bleach documentation on [allowed attributes] for more information. + * `accesstoken` Access token to use for authentication instead of `password`. Either an @@ -52,6 +69,12 @@ steps: substtution is considered to be in Markdown format and will be rendered to HTML and sent as a formatted message with `org.matrix.custom.html` format. +* `markdown_extensions` *(default:* `admonition, extra, sane_lists, smarty`) + + Comma-separated list of enabled Markdown extensions. See this + [list of extensions] for valid extension names. Including an invalid + extension name in this list will disable Markdown rendering. + * `pass_environment` *(default:* `DRONE_*`*)* Comma-separated white-list of environment variable names or name patterns. @@ -83,6 +106,10 @@ steps: ID of user on homeserver to send message as (ID, not username). +[`DEFAULT_ALLOWED_ATTRS`]: ./matrixchat-notify.py#L29 +[`DEFAULT_ALLOWED_TAGS`]: ./matrixchat-notify.py#L35 +[allowed attributes]: https://bleach.readthedocs.io/en/latest/clean.html#allowed-attributes-attributes [drone.io]: https://drone.io/ +[list of extensions]: https://python-markdown.github.io/extensions/ [plugin]: https://docs.drone.io/plugins/overview/ [reference]: https://docs.drone.io/pipeline/environment/reference/ diff --git a/matrixchat-notify.py b/matrixchat-notify.py index 0033d89..7e24740 100755 --- a/matrixchat-notify.py +++ b/matrixchat-notify.py @@ -3,7 +3,8 @@ Requires: -* +* +* Optional: * Optional: """ @@ -19,19 +20,50 @@ from distutils.util import strtobool from os.path import exists from string import Template +import bleach from nio import AsyncClient, LoginResponse PROG = "matrixchat-notify" CONFIG_FILENAME = f"{PROG}-config.json" +DEFAULT_ALLOWED_ATTRS = bleach.ALLOWED_ATTRIBUTES.copy() +DEFAULT_ALLOWED_ATTRS.update( + { + "*": ["class"], + "img": ["alt", "src"], + } +) +DEFAULT_ALLOWED_TAGS = bleach.ALLOWED_TAGS | { + "dd", + "div", + "dl", + "dt", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "p", + "span", + "table", + "td", + "th", + "thead", + "tr", +} DEFAULT_HOMESERVER = "https://matrix.org" +DEFAULT_MARKDOWN_EXTENSIONS = "admonition, extra, sane_lists, smarty" DEFAULT_PASS_ENVIRONMENT = ["DRONE_*"] DEFAULT_TEMPLATE = "${DRONE_BUILD_STATUS}" SETTINGS_KEYS = ( + "allowed_tags", + "allowed_attrs", "accesstoken", "deviceid", "devicename", "homeserver", "markdown", + "markdown_extensions", "pass_environment", "password", "roomid", @@ -138,11 +170,35 @@ def render_message(config): return Template(template).safe_substitute(context) -def render_markdown(message): +def render_markdown(message, config): import markdown - formatted = markdown.markdown(message) - return {"formatted_body": formatted, "body": message, "format": "org.matrix.custom.html"} + allowed_attrs = config.get("allowed_attrs", DEFAULT_ALLOWED_ATTRS) + allowed_tags = config.get("allowed_tags", DEFAULT_ALLOWED_TAGS) + extensions = config.get("markdown_extensions", DEFAULT_MARKDOWN_EXTENSIONS) + + if isinstance(allowed_attrs, str): + allowed_attrs = [attr.strip() for attr in allowed_attrs.split(",") if attr.strip()] + + if isinstance(allowed_tags, str): + allowed_tags = [tag.strip() for tag in allowed_tags.split(",") if tag.strip()] + + if isinstance(extensions, str): + extensions = [ext.strip() for ext in extensions.split(",") if ext.strip()] + + try: + md = markdown.Markdown(extensions=extensions) + except (AttributeError, ImportError, TypeError) as exc: + log.error("Could not instantiate Markdown formatter: %s", exc) + return message + + return { + "formatted_body": bleach.clean( + md.convert(message), tags=allowed_tags, attributes=allowed_attrs, strip=True + ), + "body": message, + "format": "org.matrix.custom.html", + } def main(args=None): @@ -209,7 +265,7 @@ def main(args=None): if tobool(config.get("markdown")) or args.render_markdown: log.debug("Rendering markdown message to HTML.") try: - message = render_markdown(message) + message = render_markdown(message, config) except: ## noqa log.exception("Failed to render message with markdown.")