Speed up the finding of missing sources

Use larger batches and more efficient duplicate deletion.
This commit is contained in:
Christopher Baines 2022-03-01 20:36:22 +00:00
parent 6cd3541d1a
commit c5b504e94a

View file

@ -1738,17 +1738,7 @@ WHERE " criteria ";"))
(chunk! missing-file-names 2000))))) (chunk! missing-file-names 2000)))))
(define (derivation-file-names->derivation-ids conn derivation-file-names) (define (derivation-file-names->derivation-ids conn derivation-file-names)
(define (select-source-files-missing-nars derivation-ids) (define (select-source-files-missing-nars! derivation-ids)
(define (split ids max-length)
(if (> (length ids)
max-length)
(call-with-values (lambda ()
(split-at ids max-length))
(lambda (ids-lst rest)
(cons ids-lst
(split rest max-length))))
(list ids)))
(define (derivation-ids->all-related-derivation-ids ids) (define (derivation-ids->all-related-derivation-ids ids)
(define query (define query
(string-append (string-append
@ -1773,7 +1763,12 @@ WITH RECURSIVE all_derivations AS (
SELECT all_derivations.derivation_id SELECT all_derivations.derivation_id
FROM all_derivations")) FROM all_derivations"))
(map car (exec-query conn query))) (map (lambda (row)
(string->number
(car row)))
(with-time-logging
"querying for batch of all related derivation ids"
(exec-query conn query))))
(define (derivation-ids->missing-sources ids) (define (derivation-ids->missing-sources ids)
(define query (define query
@ -1788,17 +1783,25 @@ INNER JOIN derivation_source_files
ON derivation_sources.derivation_source_file_id = ON derivation_sources.derivation_source_file_id =
derivation_source_files.id derivation_source_files.id
WHERE derivation_sources.derivation_id IN (" WHERE derivation_sources.derivation_id IN ("
(string-join ids ", ") (string-join (map number->string ids) ", ")
") ")
AND derivation_source_file_nars.derivation_source_file_id IS NULL")) AND derivation_source_file_nars.derivation_source_file_id IS NULL"))
(exec-query conn query)) (with-time-logging "finding batch of missing sources"
(exec-query conn query)))
(let ((all-derivation-ids (let ((all-derivation-ids
(append-map (with-time-logging "querying for all related dervation ids"
(delete-duplicates/sort!
(append-map!
derivation-ids->all-related-derivation-ids derivation-ids->all-related-derivation-ids
(split derivation-ids 250)))) (chunk! derivation-ids 5000))
(derivation-ids->missing-sources all-derivation-ids))) <))))
(with-time-logging "querying for missing sources"
(append-map! derivation-ids->missing-sources
(chunk! all-derivation-ids
10000)))))
(if (null? derivation-file-names) (if (null? derivation-file-names)
'() '()
@ -1858,6 +1861,6 @@ INNER JOIN derivation_source_files
conn conn
(string->number derivation-source-file-id) (string->number derivation-source-file-id)
store-path))) store-path)))
(select-source-files-missing-nars all-ids))) (select-source-files-missing-nars! all-ids)))
all-ids))))) all-ids)))))