Rich Media: Skip Microformats hashtags

When fixing this problem I incorrectly assumed a.hashtag is
the proper way for detecting hashtags, but it is just something Pleroma and
Mastodon add. Per microformats it should be detected by the presense of rel=tag.

This MR adds a check for rel=tag, but I still left a.hashtag just in case
This commit is contained in:
rinpatch 2019-06-19 00:31:30 +03:00
parent 3d76420512
commit 035368d363
2 changed files with 17 additions and 1 deletions

View File

@ -89,7 +89,7 @@ def extract_first_external_url(object, content) do
Cachex.fetch!(:scrubber_cache, key, fn _key -> Cachex.fetch!(:scrubber_cache, key, fn _key ->
result = result =
content content
|> Floki.filter_out("a.mention,a.hashtag") |> Floki.filter_out("a.mention,a.hashtag,a[rel~=\"tag\"]")
|> Floki.attribute("a", "href") |> Floki.attribute("a", "href")
|> Enum.at(0) |> Enum.at(0)

View File

@ -212,5 +212,21 @@ test "skips hashtags" do
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140" assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
end end
test "skips microformats hashtags" do
user = insert(:user)
{:ok, activity} =
CommonAPI.post(user, %{
"status" =>
"<a href=\"https://pleroma.gov/tags/cofe\" rel=\"tag\">#cofe</a> https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140",
"content_type" => "text/html"
})
object = Object.normalize(activity)
{:ok, url} = HTML.extract_first_external_url(object, object.data["content"])
assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
end
end end
end end