Modify the search-packages-in-revision query to make it faster

Signed-off-by: Christopher Baines <mail@cbaines.net>
This commit is contained in:
Danjela Lura 2020-08-08 18:26:46 +02:00 committed by Christopher Baines
parent bef826cf2e
commit 086cb9c9d0

View file

@ -144,13 +144,14 @@ WHERE data.name IN (SELECT name FROM package_names);"))
(define query
(string-append
"
SELECT packages.name,
packages.version,
translated_package_synopsis.synopsis,
translated_package_synopsis.locale,
translated_package_descriptions.description,
translated_package_descriptions.locale,
WITH search_results AS (
SELECT DISTINCT ON (packages.name) packages.name,
packages.version, package_synopsis.synopsis,
package_synopsis.locale AS synopsis_locale,
package_descriptions.description,
package_descriptions.locale AS description_locale,
package_metadata.home_page,
package_metadata_tsvectors.synopsis_and_description,
locations.file, locations.line, locations.column_number,
(SELECT JSON_AGG((license_data.*))
FROM (
@ -166,32 +167,12 @@ INNER JOIN package_metadata
ON packages.package_metadata_id = package_metadata.id
LEFT OUTER JOIN locations
ON package_metadata.location_id = locations.id
INNER JOIN (
SELECT DISTINCT ON (package_synopsis_sets.id) package_synopsis_sets.id,
package_synopsis.synopsis, package_synopsis.locale
FROM package_synopsis_sets
INNER JOIN package_metadata_tsvectors
ON package_metadata_tsvectors.package_metadata_id = package_metadata.id
INNER JOIN package_synopsis
ON package_synopsis.id = ANY (package_synopsis_sets.synopsis_ids)
ORDER BY package_synopsis_sets.id,
CASE WHEN package_synopsis.locale = $3 THEN 2
WHEN package_synopsis.locale = 'en_US.utf8' THEN 1
ELSE 0
END DESC
) AS translated_package_synopsis
ON package_metadata.package_synopsis_set_id = translated_package_synopsis.id
INNER JOIN (
SELECT DISTINCT ON (package_description_sets.id) package_description_sets.id,
package_descriptions.description, package_descriptions.locale
FROM package_description_sets
ON package_metadata_tsvectors.package_synopsis_id = package_synopsis.id
INNER JOIN package_descriptions
ON package_descriptions.id = ANY (package_description_sets.description_ids)
ORDER BY package_description_sets.id,
CASE WHEN package_descriptions.locale = $3 THEN 2
WHEN package_descriptions.locale = 'en_US.utf8' THEN 1
ELSE 0
END DESC
) AS translated_package_descriptions
ON package_metadata.package_description_set_id = translated_package_descriptions.id
ON package_metadata_tsvectors.package_description_id = package_descriptions.id
WHERE packages.id IN (
SELECT package_derivations.package_id
FROM package_derivations
@ -201,21 +182,29 @@ WHERE packages.id IN (
ON guix_revision_package_derivations.revision_id = guix_revisions.id
WHERE guix_revisions.commit = $1
)
AND to_tsvector(name || ' ' || synopsis) @@ plainto_tsquery($2)
AND package_metadata_tsvectors.synopsis_and_description @@ plainto_tsquery($2)
ORDER BY name,
CASE WHEN package_metadata_tsvectors.locale = 'en_US.utf8' THEN 2
WHEN package_metadata_tsvectors.locale = $3 THEN 1
ELSE 0
END DESC
)
SELECT name, version, synopsis, synopsis_locale,
description, description_locale,
home_page, file, line, column_number, licenses
FROM search_results
ORDER BY (
ts_rank_cd(
to_tsvector(name),
setweight(to_tsvector(name), 'A'),
plainto_tsquery($2),
2 -- divide rank by the document length
)
* 4 -- as the name is more important
) +
ts_rank_cd(
to_tsvector(synopsis),
synopsis_and_description,
plainto_tsquery($2),
32 -- divide the rank by itself + 1
)
) DESC,
-- to make the order stable
name,
version
"