From f1e67bdc312ba16a37916024244d6cb9d4417c9e Mon Sep 17 00:00:00 2001 From: lain Date: Wed, 15 May 2019 15:28:01 +0200 Subject: [PATCH] Search: Add optional rum indexing / searching. --- config/config.exs | 2 + docs/config.md | 15 +++++++ .../mastodon_api/mastodon_api_controller.ex | 43 ++++++++++++++----- mix.exs | 5 ++- mix.lock | 10 ++--- ...510135645_add_fts_index_to_objects_two.exs | 3 ++ 6 files changed, 62 insertions(+), 16 deletions(-) rename priv/repo/{migrations => optional_migrations/rum_indexing}/20190510135645_add_fts_index_to_objects_two.exs (91%) diff --git a/config/config.exs b/config/config.exs index 1e64b79a7..42e4cb4ce 100644 --- a/config/config.exs +++ b/config/config.exs @@ -476,6 +476,8 @@ token_expires_in: 600, issue_new_refresh_token: true +config :pleroma, :database, rum_enabled: false + # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. import_config "#{Mix.env()}.exs" diff --git a/docs/config.md b/docs/config.md index 43ea24d80..99cee25cd 100644 --- a/docs/config.md +++ b/docs/config.md @@ -537,3 +537,18 @@ Configure OAuth 2 provider capabilities: * `shortcode_globs`: Location of custom emoji files. `*` can be used as a wildcard. Example `["/emoji/custom/**/*.png"]` * `groups`: Emojis are ordered in groups (tags). This is an array of key-value pairs where the key is the groupname and the value the location or array of locations. `*` can be used as a wildcard. Example `[Custom: ["/emoji/*.png", "/emoji/custom/*.png"]]` * `default_manifest`: Location of the JSON-manifest. This manifest contains information about the emoji-packs you can download. Currently only one manifest can be added (no arrays). + +## Database options + +### RUM indexing for full text search +* `rum_enabled`: If RUM indexes should be used. Defaults to `false`. + +RUM indexes are an alternative indexing scheme that is not included in PostgreSQL by default. While they may eventually be mainlined, for now they have to be installed as a PostgreSQL extension from https://github.com/postgrespro/rum. + +Their advantage over the standard GIN indexes is that they allow efficient ordering of search results by timestamp, which makes search queries a lot faster on larger servers, by one or two orders of magnitude. They take up around 3 times as much space as GIN indexes. + +To enable them, both the `rum_enabled` flag has to be set and the following special migration has to be run: + +`mix ecto.migrate --migrations-path priv/repo/optional_migrations/rum_indexing/` + +This will probably take a long time. diff --git a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex index 32677df95..fc0100b1e 100644 --- a/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex +++ b/lib/pleroma/web/mastodon_api/mastodon_api_controller.ex @@ -1000,6 +1000,30 @@ def unsubscribe(%{assigns: %{user: user}} = conn, %{"id" => id}) do end end + def status_search_query_with_gin(q, query) do + from([a, o] in q, + where: + fragment( + "to_tsvector('english', ?->>'content') @@ plainto_tsquery('english', ?)", + o.data, + ^query + ), + order_by: [desc: :id] + ) + end + + def status_search_query_with_rum(q, query) do + from([a, o] in q, + where: + fragment( + "? @@ plainto_tsquery('english', ?)", + o.fts_content, + ^query + ), + order_by: [fragment("? <=> now()::date", o.inserted_at)] + ) + end + def status_search(user, query) do fetched = if Regex.match?(~r/https?:/, query) do @@ -1013,20 +1037,19 @@ def status_search(user, query) do end || [] q = - from( - [a, o] in Activity.with_preloaded_object(Activity), + from([a, o] in Activity.with_preloaded_object(Activity), where: fragment("?->>'type' = 'Create'", a.data), where: "https://www.w3.org/ns/activitystreams#Public" in a.recipients, - where: - fragment( - "? @@ plainto_tsquery('english', ?)", - o.fts_content, - ^query - ), - limit: 20, - order_by: [fragment("? <=> now()::date", o.inserted_at)] + limit: 20 ) + q = + if Pleroma.Config.get([:database, :rum_enabled]) do + status_search_query_with_rum(q, query) + else + status_search_query_with_gin(q, query) + end + Repo.all(q) ++ fetched end diff --git a/mix.exs b/mix.exs index fae21f18d..f7955c6ca 100644 --- a/mix.exs +++ b/mix.exs @@ -65,7 +65,10 @@ defp deps do {:plug_cowboy, "~> 2.0"}, {:phoenix_pubsub, "~> 1.1"}, {:phoenix_ecto, "~> 4.0"}, - {:ecto_sql, "~>3.0.5"}, + {:ecto_sql, + git: "https://github.com/elixir-ecto/ecto_sql", + ref: "e839a9a327b632d73533ac8105ba360bc831cf83", + override: true}, {:postgrex, ">= 0.13.5"}, {:gettext, "~> 0.15"}, {:comeonin, "~> 4.1.1"}, diff --git a/mix.lock b/mix.lock index 624c0fb35..d223803ab 100644 --- a/mix.lock +++ b/mix.lock @@ -16,12 +16,12 @@ "cowlib": {:hex, :cowlib, "2.7.0", "3ef16e77562f9855a2605900cedb15c1462d76fb1be6a32fc3ae91973ee543d2", [:rebar3], [], "hexpm"}, "credo": {:hex, :credo, "0.9.3", "76fa3e9e497ab282e0cf64b98a624aa11da702854c52c82db1bf24e54ab7c97a", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:poison, ">= 0.0.0", [hex: :poison, repo: "hexpm", optional: false]}], "hexpm"}, "crypt": {:git, "https://github.com/msantos/crypt", "1f2b58927ab57e72910191a7ebaeff984382a1d3", [ref: "1f2b58927ab57e72910191a7ebaeff984382a1d3"]}, - "db_connection": {:hex, :db_connection, "2.0.5", "ddb2ba6761a08b2bb9ca0e7d260e8f4dd39067426d835c24491a321b7f92a4da", [:mix], [{:connection, "~> 1.0.2", [hex: :connection, repo: "hexpm", optional: false]}], "hexpm"}, + "db_connection": {:hex, :db_connection, "2.0.6", "bde2f85d047969c5b5800cb8f4b3ed6316c8cb11487afedac4aa5f93fd39abfa", [:mix], [{:connection, "~> 1.0.2", [hex: :connection, repo: "hexpm", optional: false]}], "hexpm"}, "decimal": {:hex, :decimal, "1.7.0", "30d6b52c88541f9a66637359ddf85016df9eb266170d53105f02e4a67e00c5aa", [:mix], [], "hexpm"}, "deep_merge": {:hex, :deep_merge, "1.0.0", "b4aa1a0d1acac393bdf38b2291af38cb1d4a52806cf7a4906f718e1feb5ee961", [:mix], [], "hexpm"}, "earmark": {:hex, :earmark, "1.3.2", "b840562ea3d67795ffbb5bd88940b1bed0ed9fa32834915125ea7d02e35888a5", [:mix], [], "hexpm"}, - "ecto": {:hex, :ecto, "3.0.7", "44dda84ac6b17bbbdeb8ac5dfef08b7da253b37a453c34ab1a98de7f7e5fec7f", [:mix], [{:decimal, "~> 1.6", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:poison, "~> 2.2 or ~> 3.0", [hex: :poison, repo: "hexpm", optional: true]}], "hexpm"}, - "ecto_sql": {:hex, :ecto_sql, "3.0.5", "7e44172b4f7aca4469f38d7f6a3da394dbf43a1bcf0ca975e958cb957becd74e", [:mix], [{:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.0.6", [hex: :ecto, repo: "hexpm", optional: false]}, {:mariaex, "~> 0.9.1", [hex: :mariaex, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.14.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.3.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm"}, + "ecto": {:hex, :ecto, "3.1.4", "69d852da7a9f04ede725855a35ede48d158ca11a404fe94f8b2fb3b2162cd3c9", [:mix], [{:decimal, "~> 1.6", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"}, + "ecto_sql": {:git, "https://github.com/elixir-ecto/ecto_sql", "e839a9a327b632d73533ac8105ba360bc831cf83", [ref: "e839a9a327b632d73533ac8105ba360bc831cf83"]}, "esshd": {:hex, :esshd, "0.1.0", "6f93a2062adb43637edad0ea7357db2702a4b80dd9683482fe00f5134e97f4c1", [:mix], [], "hexpm"}, "eternal": {:hex, :eternal, "1.2.0", "e2a6b6ce3b8c248f7dc31451aefca57e3bdf0e48d73ae5043229380a67614c41", [:mix], [], "hexpm"}, "ex_aws": {:hex, :ex_aws, "2.1.0", "b92651527d6c09c479f9013caa9c7331f19cba38a650590d82ebf2c6c16a1d8a", [:mix], [{:configparser_ex, "~> 2.0", [hex: :configparser_ex, repo: "hexpm", optional: true]}, {:hackney, "1.6.3 or 1.6.5 or 1.7.1 or 1.8.6 or ~> 1.9", [hex: :hackney, repo: "hexpm", optional: true]}, {:jsx, "~> 2.8", [hex: :jsx, repo: "hexpm", optional: true]}, {:poison, ">= 1.2.0", [hex: :poison, repo: "hexpm", optional: true]}, {:sweet_xml, "~> 0.6", [hex: :sweet_xml, repo: "hexpm", optional: true]}, {:xml_builder, "~> 0.1.0", [hex: :xml_builder, repo: "hexpm", optional: true]}], "hexpm"}, @@ -61,7 +61,7 @@ "plug_crypto": {:hex, :plug_crypto, "1.0.0", "18e49317d3fa343f24620ed22795ec29d4a5e602d52d1513ccea0b07d8ea7d4d", [:mix], [], "hexpm"}, "poison": {:hex, :poison, "3.1.0", "d9eb636610e096f86f25d9a46f35a9facac35609a7591b3be3326e99a0484665", [:mix], [], "hexpm"}, "poolboy": {:hex, :poolboy, "1.5.2", "392b007a1693a64540cead79830443abf5762f5d30cf50bc95cb2c1aaafa006b", [:rebar3], [], "hexpm"}, - "postgrex": {:hex, :postgrex, "0.14.1", "63247d4a5ad6b9de57a0bac5d807e1c32d41e39c04b8a4156a26c63bcd8a2e49", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"}, + "postgrex": {:hex, :postgrex, "0.14.3", "5754dee2fdf6e9e508cbf49ab138df964278700b764177e8f3871e658b345a1e", [:mix], [{:connection, "~> 1.0", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.0", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm"}, "prometheus": {:hex, :prometheus, "4.2.2", "a830e77b79dc6d28183f4db050a7cac926a6c58f1872f9ef94a35cd989aceef8", [:mix, :rebar3], [], "hexpm"}, "prometheus_ecto": {:hex, :prometheus_ecto, "1.4.1", "6c768ea9654de871e5b32fab2eac348467b3021604ebebbcbd8bcbe806a65ed5", [:mix], [{:ecto, "~> 2.0 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:prometheus_ex, "~> 1.1 or ~> 2.0 or ~> 3.0", [hex: :prometheus_ex, repo: "hexpm", optional: false]}], "hexpm"}, "prometheus_ex": {:hex, :prometheus_ex, "3.0.5", "fa58cfd983487fc5ead331e9a3e0aa622c67232b3ec71710ced122c4c453a02f", [:mix], [{:prometheus, "~> 4.0", [hex: :prometheus, repo: "hexpm", optional: false]}], "hexpm"}, @@ -74,7 +74,7 @@ "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.4", "f0eafff810d2041e93f915ef59899c923f4568f4585904d010387ed74988e77b", [:make, :mix, :rebar3], [], "hexpm"}, "swoosh": {:hex, :swoosh, "0.20.0", "9a6c13822c9815993c03b6f8fccc370fcffb3c158d9754f67b1fdee6b3a5d928", [:mix], [{:cowboy, "~> 1.0.1 or ~> 1.1 or ~> 2.4", [hex: :cowboy, repo: "hexpm", optional: true]}, {:gen_smtp, "~> 0.12", [hex: :gen_smtp, repo: "hexpm", optional: true]}, {:hackney, "~> 1.9", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.1", [hex: :mime, repo: "hexpm", optional: false]}, {:plug, "~> 1.4", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm"}, "syslog": {:git, "https://github.com/Vagabond/erlang-syslog.git", "4a6c6f2c996483e86c1320e9553f91d337bcb6aa", [tag: "1.0.5"]}, - "telemetry": {:hex, :telemetry, "0.3.0", "099a7f3ce31e4780f971b4630a3c22ec66d22208bc090fe33a2a3a6a67754a73", [:rebar3], [], "hexpm"}, + "telemetry": {:hex, :telemetry, "0.4.0", "8339bee3fa8b91cb84d14c2935f8ecf399ccd87301ad6da6b71c09553834b2ab", [:rebar3], [], "hexpm"}, "tesla": {:hex, :tesla, "1.2.1", "864783cc27f71dd8c8969163704752476cec0f3a51eb3b06393b3971dc9733ff", [:mix], [{:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]}, {:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]}, {:ibrowse, "~> 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, {:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}], "hexpm"}, "timex": {:hex, :timex, "3.5.0", "b0a23167da02d0fe4f1a4e104d1f929a00d348502b52432c05de875d0b9cffa5", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.10", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 0.1.8 or ~> 0.5", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm"}, "trailing_format_plug": {:hex, :trailing_format_plug, "0.0.7", "64b877f912cf7273bed03379936df39894149e35137ac9509117e59866e10e45", [:mix], [{:plug, "> 0.12.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm"}, diff --git a/priv/repo/migrations/20190510135645_add_fts_index_to_objects_two.exs b/priv/repo/optional_migrations/rum_indexing/20190510135645_add_fts_index_to_objects_two.exs similarity index 91% rename from priv/repo/migrations/20190510135645_add_fts_index_to_objects_two.exs rename to priv/repo/optional_migrations/rum_indexing/20190510135645_add_fts_index_to_objects_two.exs index 14b964847..09e6cbfb1 100644 --- a/priv/repo/migrations/20190510135645_add_fts_index_to_objects_two.exs +++ b/priv/repo/optional_migrations/rum_indexing/20190510135645_add_fts_index_to_objects_two.exs @@ -2,6 +2,7 @@ defmodule Pleroma.Repo.Migrations.AddFtsIndexToObjectsTwo do use Ecto.Migration def up do + execute("create extension if not exists rum") drop_if_exists index(:objects, ["(to_tsvector('english', data->>'content'))"], using: :gin, name: :objects_fts) alter table(:objects) do add(:fts_content, :tsvector) @@ -19,6 +20,7 @@ def up do FOR EACH ROW EXECUTE PROCEDURE objects_fts_update()") execute("UPDATE objects SET updated_at = NOW()") + execute("vacuum analyze") end def down do @@ -29,5 +31,6 @@ def down do remove(:fts_content, :tsvector) end create index(:objects, ["(to_tsvector('english', data->>'content'))"], using: :gin, name: :objects_fts) + execute("vacuum analyze") end end