Speed up inserting missing derivation sources

Split the recursive part of the query from the non-recursive part, since
PostgreSQL doesn't do a great job of estimating the number of rows which will
come back from the recursive part, and thus generates a bad plan.
This commit is contained in:
Christopher Baines 2021-09-24 12:55:45 +01:00
parent 8b34126d22
commit 261552bd5e

View file

@ -31,6 +31,7 @@
#:use-module (guix inferior) #:use-module (guix inferior)
#:use-module (guix memoization) #:use-module (guix memoization)
#:use-module (guix derivations) #:use-module (guix derivations)
#:use-module (guix-data-service utils)
#:use-module (guix-data-service database) #:use-module (guix-data-service database)
#:use-module (guix-data-service model utils) #:use-module (guix-data-service model utils)
#:use-module (guix-data-service model system) #:use-module (guix-data-service model system)
@ -1599,15 +1600,17 @@ LIMIT $1"
derivation-inputs derivation-inputs
derivations)))) derivations))))
(simple-format (with-time-logging
#t "debug: insert-missing-derivations: inserting inputs\n") (simple-format
(for-each (lambda (derivation-id derivation) #f "insert-missing-derivations: inserting inputs for ~A derivations"
(insert-derivation-inputs conn (length derivations))
derivation-id (for-each (lambda (derivation-id derivation)
(derivation-inputs derivation))) (insert-derivation-inputs conn
derivation-id
(derivation-inputs derivation)))
derivation-ids derivation-ids
derivations) derivations))
derivation-ids)) derivation-ids))
@ -1738,19 +1741,20 @@ WHERE " criteria ";"))
(define (derivation-file-names->derivation-ids conn derivation-file-names) (define (derivation-file-names->derivation-ids conn derivation-file-names)
(define (select-source-files-missing-nars derivation-ids) (define (select-source-files-missing-nars derivation-ids)
(define (split ids) (define (split ids max-length)
(if (> (length ids) (if (> (length ids)
1000) max-length)
(call-with-values (lambda () (call-with-values (lambda ()
(split-at ids 1000)) (split-at ids max-length))
(lambda (ids-lst rest) (lambda (ids-lst rest)
(cons ids-lst (cons ids-lst
(split rest)))) (split rest max-length))))
(list ids))) (list ids)))
(define (query ids) (define (derivation-ids->all-related-derivation-ids ids)
(string-append (define query
" (string-append
"
WITH RECURSIVE all_derivations AS ( WITH RECURSIVE all_derivations AS (
SELECT column1 AS derivation_id SELECT column1 AS derivation_id
FROM (VALUES " FROM (VALUES "
@ -1768,23 +1772,35 @@ WITH RECURSIVE all_derivations AS (
INNER JOIN derivation_outputs INNER JOIN derivation_outputs
ON derivation_outputs.id = derivation_inputs.derivation_output_id ON derivation_outputs.id = derivation_inputs.derivation_output_id
) )
SELECT all_derivations.derivation_id
FROM all_derivations"))
(map car (exec-query conn query)))
(define (derivation-ids->missing-sources ids)
(define query
(string-append
"
SELECT derivation_sources.derivation_source_file_id, derivation_source_files.store_path SELECT derivation_sources.derivation_source_file_id, derivation_source_files.store_path
FROM all_derivations FROM derivation_sources
INNER JOIN derivation_sources
ON derivation_sources.derivation_id = all_derivations.derivation_id
LEFT JOIN derivation_source_file_nars LEFT JOIN derivation_source_file_nars
ON derivation_sources.derivation_source_file_id = ON derivation_sources.derivation_source_file_id =
derivation_source_file_nars.derivation_source_file_id derivation_source_file_nars.derivation_source_file_id
INNER JOIN derivation_source_files INNER JOIN derivation_source_files
ON derivation_sources.derivation_source_file_id = ON derivation_sources.derivation_source_file_id =
derivation_source_files.id derivation_source_files.id
WHERE derivation_source_file_nars.derivation_source_file_id IS NULL")) WHERE derivation_sources.derivation_id IN ("
(string-join ids ", ")
")
AND derivation_source_file_nars.derivation_source_file_id IS NULL"))
(delete-duplicates (exec-query conn query))
(append-map
(lambda (ids) (let ((all-derivation-ids
(exec-query conn (query ids))) (append-map
(split derivation-ids)))) derivation-ids->all-related-derivation-ids
(split derivation-ids 250))))
(derivation-ids->missing-sources all-derivation-ids)))
(if (null? derivation-file-names) (if (null? derivation-file-names)
'() '()
@ -1827,12 +1843,13 @@ WHERE derivation_source_file_nars.derivation_source_file_id IS NULL"))
(error "missing derivation id")))) (error "missing derivation id"))))
derivation-file-names))) derivation-file-names)))
(for-each (match-lambda (with-time-logging "inserting missing source files"
((derivation-source-file-id store-path) (for-each (match-lambda
(insert-derivation-source-file-nar ((derivation-source-file-id store-path)
conn (insert-derivation-source-file-nar
(string->number derivation-source-file-id) conn
store-path))) (string->number derivation-source-file-id)
(select-source-files-missing-nars all-ids)) store-path)))
(select-source-files-missing-nars all-ids)))
all-ids)))) all-ids))))