From 3623504e5d7b4dd6dd250151685343109de1e889 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Mon, 18 Jun 2018 12:45:15 +0200 Subject: [PATCH 1/4] [Pleroma.Formatter]: Add support for non-HTTP schemes in URIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to the regex in add_links is there just to be sure it’s a legal URI, it can be removed if you want to get more performance. The URI Schemes list is sorted, but with http(s) at the start (in case it might make it faster for common links). Closes: https://git.pleroma.social/pleroma/pleroma/issues/127 --- lib/pleroma/formatter.ex | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/lib/pleroma/formatter.ex b/lib/pleroma/formatter.ex index 0aaf21538..fe3da09ac 100644 --- a/lib/pleroma/formatter.ex +++ b/lib/pleroma/formatter.ex @@ -165,8 +165,29 @@ def get_custom_emoji() do @emoji end - @link_regex ~r/https?:\/\/[\w\.\/?=\-#\+%&@~'\(\):]+[\w\/]/u + @link_regex ~r/[0-9a-z+\-\.]+:[0-9a-z$-_.+!*'(),]+/ui + # IANA got a list https://www.iana.org/assignments/uri-schemes/ but + # Stuff like ipfs isn’t in it + # There is very niche stuff + @uri_schemes [ + "https://", + "http://", + "dat://", + "dweb://", + "gopher://", + "ipfs://", + "ipns://", + "irc:", + "ircs:", + "magnet:", + "mailto:", + "mumble:", + "ssb://", + "xmpp:" + ] + + # TODO: make it use something other than @link_regex def html_escape(text) do Regex.split(@link_regex, text, include_captures: true) |> Enum.map_every(2, fn chunk -> @@ -176,11 +197,14 @@ def html_escape(text) do |> Enum.join("") end - @doc "changes http:... links to html links" + @doc "changes scheme:... urls to html links" def add_links({subs, text}) do links = - Regex.scan(@link_regex, text) - |> Enum.map(fn [url] -> {Ecto.UUID.generate(), url} end) + text + |> String.split([" ", "\t", "
"]) + |> Enum.filter(fn word -> String.starts_with?(word, @uri_schemes) end) + |> Enum.filter(fn word -> Regex.match?(@link_regex, word) end) + |> Enum.map(fn url -> {Ecto.UUID.generate(), url} end) |> Enum.sort_by(fn {_, url} -> -String.length(url) end) uuid_text = From 1a2255ef7ed6978b10b0dabbcee1036fe06c87a7 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Sat, 23 Jun 2018 13:28:14 +0200 Subject: [PATCH 2/4] [Pleroma.FormatterTest]: .add_links: Add a space before the dot A dot is legal in the url, even at the end, so I moved it for the test --- test/formatter_test.exs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/formatter_test.exs b/test/formatter_test.exs index c2b3d4ac0..acf0adb1c 100644 --- a/test/formatter_test.exs +++ b/test/formatter_test.exs @@ -20,10 +20,10 @@ test "turns hashtags into links" do describe ".add_links" do test "turning urls into links" do - text = "Hey, check out https://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla." + text = "Hey, check out https://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla ." expected = - "Hey, check out https://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla." + "Hey, check out https://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla ." assert Formatter.add_links({[], text}) |> Formatter.finalize() == expected From 32a55e96958e949b69578af49cf41e720916988c Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Sat, 23 Jun 2018 13:32:03 +0200 Subject: [PATCH 3/4] [Pleroma.FormatterTest] Add test for XMPP link --- test/formatter_test.exs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/formatter_test.exs b/test/formatter_test.exs index acf0adb1c..abbd7ac93 100644 --- a/test/formatter_test.exs +++ b/test/formatter_test.exs @@ -85,6 +85,12 @@ test "turning urls into links" do "https://pleroma.com https://pleroma.com/sucks" assert Formatter.add_links({[], text}) |> Formatter.finalize() == expected + + text = "xmpp:contact@hacktivis.me" + + expected = "xmpp:contact@hacktivis.me" + + assert Formatter.add_links({[], text}) |> Formatter.finalize() == expected end end From d5091c3175786e5bcb0449f26cafe1795fd5f5d9 Mon Sep 17 00:00:00 2001 From: "Haelwenn (lanodan) Monnier" Date: Mon, 30 Jul 2018 21:59:04 +0200 Subject: [PATCH 4/4] Allow additionnal schemes in the config --- config/config.exs | 2 ++ lib/pleroma/formatter.ex | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/config/config.exs b/config/config.exs index 3a7301348..ef5bcd65c 100644 --- a/config/config.exs +++ b/config/config.exs @@ -16,6 +16,8 @@ config :pleroma, :emoji, shortcode_globs: ["/emoji/custom/**/*.png"] +config :pleroma, :uri_schemes, additionnal_schemes: [] + # Configures the endpoint config :pleroma, Pleroma.Web.Endpoint, url: [host: "localhost"], diff --git a/lib/pleroma/formatter.ex b/lib/pleroma/formatter.ex index fe3da09ac..e15c08fd6 100644 --- a/lib/pleroma/formatter.ex +++ b/lib/pleroma/formatter.ex @@ -199,10 +199,14 @@ def html_escape(text) do @doc "changes scheme:... urls to html links" def add_links({subs, text}) do + additionnal_schemes = + Application.get_env(:pleroma, :uri_schemes, []) + |> Keyword.get(:additionnal_schemes, []) + links = text |> String.split([" ", "\t", "
"]) - |> Enum.filter(fn word -> String.starts_with?(word, @uri_schemes) end) + |> Enum.filter(fn word -> String.starts_with?(word, @uri_schemes ++ additionnal_schemes) end) |> Enum.filter(fn word -> Regex.match?(@link_regex, word) end) |> Enum.map(fn url -> {Ecto.UUID.generate(), url} end) |> Enum.sort_by(fn {_, url} -> -String.length(url) end)