Modify the search-packages-in-revision query to make it faster

Signed-off-by: Christopher Baines <mail@cbaines.net>
This commit is contained in:
Danjela Lura 2020-08-08 18:26:46 +02:00 committed by Christopher Baines
parent bef826cf2e
commit 086cb9c9d0

View file

@ -143,79 +143,68 @@ WHERE data.name IN (SELECT name FROM package_names);"))
locale) locale)
(define query (define query
(string-append (string-append
" "
SELECT packages.name, WITH search_results AS (
packages.version, SELECT DISTINCT ON (packages.name) packages.name,
translated_package_synopsis.synopsis, packages.version, package_synopsis.synopsis,
translated_package_synopsis.locale, package_synopsis.locale AS synopsis_locale,
translated_package_descriptions.description, package_descriptions.description,
translated_package_descriptions.locale, package_descriptions.locale AS description_locale,
package_metadata.home_page, package_metadata.home_page,
locations.file, locations.line, locations.column_number, package_metadata_tsvectors.synopsis_and_description,
(SELECT JSON_AGG((license_data.*)) locations.file, locations.line, locations.column_number,
FROM ( (SELECT JSON_AGG((license_data.*))
SELECT licenses.name, licenses.uri, licenses.comment FROM (
FROM licenses SELECT licenses.name, licenses.uri, licenses.comment
INNER JOIN license_sets ON licenses.id = ANY(license_sets.license_ids) FROM licenses
WHERE license_sets.id = package_metadata.license_set_id INNER JOIN license_sets ON licenses.id = ANY(license_sets.license_ids)
ORDER BY licenses.name WHERE license_sets.id = package_metadata.license_set_id
) AS license_data ORDER BY licenses.name
) AS licenses ) AS license_data
FROM packages ) AS licenses
INNER JOIN package_metadata FROM packages
ON packages.package_metadata_id = package_metadata.id INNER JOIN package_metadata
LEFT OUTER JOIN locations ON packages.package_metadata_id = package_metadata.id
ON package_metadata.location_id = locations.id LEFT OUTER JOIN locations
INNER JOIN ( ON package_metadata.location_id = locations.id
SELECT DISTINCT ON (package_synopsis_sets.id) package_synopsis_sets.id, INNER JOIN package_metadata_tsvectors
package_synopsis.synopsis, package_synopsis.locale ON package_metadata_tsvectors.package_metadata_id = package_metadata.id
FROM package_synopsis_sets
INNER JOIN package_synopsis INNER JOIN package_synopsis
ON package_synopsis.id = ANY (package_synopsis_sets.synopsis_ids) ON package_metadata_tsvectors.package_synopsis_id = package_synopsis.id
ORDER BY package_synopsis_sets.id,
CASE WHEN package_synopsis.locale = $3 THEN 2
WHEN package_synopsis.locale = 'en_US.utf8' THEN 1
ELSE 0
END DESC
) AS translated_package_synopsis
ON package_metadata.package_synopsis_set_id = translated_package_synopsis.id
INNER JOIN (
SELECT DISTINCT ON (package_description_sets.id) package_description_sets.id,
package_descriptions.description, package_descriptions.locale
FROM package_description_sets
INNER JOIN package_descriptions INNER JOIN package_descriptions
ON package_descriptions.id = ANY (package_description_sets.description_ids) ON package_metadata_tsvectors.package_description_id = package_descriptions.id
ORDER BY package_description_sets.id, WHERE packages.id IN (
CASE WHEN package_descriptions.locale = $3 THEN 2 SELECT package_derivations.package_id
WHEN package_descriptions.locale = 'en_US.utf8' THEN 1 FROM package_derivations
ELSE 0 INNER JOIN guix_revision_package_derivations
END DESC ON package_derivations.id = guix_revision_package_derivations.package_derivation_id
) AS translated_package_descriptions INNER JOIN guix_revisions
ON package_metadata.package_description_set_id = translated_package_descriptions.id ON guix_revision_package_derivations.revision_id = guix_revisions.id
WHERE packages.id IN ( WHERE guix_revisions.commit = $1
SELECT package_derivations.package_id )
FROM package_derivations AND package_metadata_tsvectors.synopsis_and_description @@ plainto_tsquery($2)
INNER JOIN guix_revision_package_derivations ORDER BY name,
ON package_derivations.id = guix_revision_package_derivations.package_derivation_id CASE WHEN package_metadata_tsvectors.locale = 'en_US.utf8' THEN 2
INNER JOIN guix_revisions WHEN package_metadata_tsvectors.locale = $3 THEN 1
ON guix_revision_package_derivations.revision_id = guix_revisions.id ELSE 0
WHERE guix_revisions.commit = $1 END DESC
) )
AND to_tsvector(name || ' ' || synopsis) @@ plainto_tsquery($2) SELECT name, version, synopsis, synopsis_locale,
description, description_locale,
home_page, file, line, column_number, licenses
FROM search_results
ORDER BY ( ORDER BY (
ts_rank_cd( ts_rank_cd(
to_tsvector(name), setweight(to_tsvector(name), 'A'),
plainto_tsquery($2), plainto_tsquery($2),
2 -- divide rank by the document length 2 -- divide rank by the document length
) +
ts_rank_cd(
synopsis_and_description,
plainto_tsquery($2),
32 -- divide the rank by itself + 1
) )
* 4 -- as the name is more important
) +
ts_rank_cd(
to_tsvector(synopsis),
plainto_tsquery($2),
32 -- divide the rank by itself + 1
) DESC, ) DESC,
-- to make the order stable
name, name,
version version
" "