Speed up the finding of missing sources

Use larger batches and more efficient duplicate deletion.
This commit is contained in:
Christopher Baines 2022-03-01 20:36:22 +00:00
parent 6cd3541d1a
commit c5b504e94a

View file

@ -1738,17 +1738,7 @@ WHERE " criteria ";"))
(chunk! missing-file-names 2000)))))
(define (derivation-file-names->derivation-ids conn derivation-file-names)
(define (select-source-files-missing-nars derivation-ids)
(define (split ids max-length)
(if (> (length ids)
max-length)
(call-with-values (lambda ()
(split-at ids max-length))
(lambda (ids-lst rest)
(cons ids-lst
(split rest max-length))))
(list ids)))
(define (select-source-files-missing-nars! derivation-ids)
(define (derivation-ids->all-related-derivation-ids ids)
(define query
(string-append
@ -1773,7 +1763,12 @@ WITH RECURSIVE all_derivations AS (
SELECT all_derivations.derivation_id
FROM all_derivations"))
(map car (exec-query conn query)))
(map (lambda (row)
(string->number
(car row)))
(with-time-logging
"querying for batch of all related derivation ids"
(exec-query conn query))))
(define (derivation-ids->missing-sources ids)
(define query
@ -1788,17 +1783,25 @@ INNER JOIN derivation_source_files
ON derivation_sources.derivation_source_file_id =
derivation_source_files.id
WHERE derivation_sources.derivation_id IN ("
(string-join ids ", ")
(string-join (map number->string ids) ", ")
")
AND derivation_source_file_nars.derivation_source_file_id IS NULL"))
(exec-query conn query))
(with-time-logging "finding batch of missing sources"
(exec-query conn query)))
(let ((all-derivation-ids
(append-map
derivation-ids->all-related-derivation-ids
(split derivation-ids 250))))
(derivation-ids->missing-sources all-derivation-ids)))
(with-time-logging "querying for all related dervation ids"
(delete-duplicates/sort!
(append-map!
derivation-ids->all-related-derivation-ids
(chunk! derivation-ids 5000))
<))))
(with-time-logging "querying for missing sources"
(append-map! derivation-ids->missing-sources
(chunk! all-derivation-ids
10000)))))
(if (null? derivation-file-names)
'()
@ -1858,6 +1861,6 @@ INNER JOIN derivation_source_files
conn
(string->number derivation-source-file-id)
store-path)))
(select-source-files-missing-nars all-ids)))
(select-source-files-missing-nars! all-ids)))
all-ids)))))