From c5b504e94a08aab8d19d752542874a588fe9a765 Mon Sep 17 00:00:00 2001 From: Christopher Baines Date: Tue, 1 Mar 2022 20:36:22 +0000 Subject: [PATCH] Speed up the finding of missing sources Use larger batches and more efficient duplicate deletion. --- guix-data-service/model/derivation.scm | 41 ++++++++++++++------------ 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/guix-data-service/model/derivation.scm b/guix-data-service/model/derivation.scm index 47b5adc..c1d1c69 100644 --- a/guix-data-service/model/derivation.scm +++ b/guix-data-service/model/derivation.scm @@ -1738,17 +1738,7 @@ WHERE " criteria ";")) (chunk! missing-file-names 2000))))) (define (derivation-file-names->derivation-ids conn derivation-file-names) - (define (select-source-files-missing-nars derivation-ids) - (define (split ids max-length) - (if (> (length ids) - max-length) - (call-with-values (lambda () - (split-at ids max-length)) - (lambda (ids-lst rest) - (cons ids-lst - (split rest max-length)))) - (list ids))) - + (define (select-source-files-missing-nars! derivation-ids) (define (derivation-ids->all-related-derivation-ids ids) (define query (string-append @@ -1773,7 +1763,12 @@ WITH RECURSIVE all_derivations AS ( SELECT all_derivations.derivation_id FROM all_derivations")) - (map car (exec-query conn query))) + (map (lambda (row) + (string->number + (car row))) + (with-time-logging + "querying for batch of all related derivation ids" + (exec-query conn query)))) (define (derivation-ids->missing-sources ids) (define query @@ -1788,17 +1783,25 @@ INNER JOIN derivation_source_files ON derivation_sources.derivation_source_file_id = derivation_source_files.id WHERE derivation_sources.derivation_id IN (" - (string-join ids ", ") + (string-join (map number->string ids) ", ") ") AND derivation_source_file_nars.derivation_source_file_id IS NULL")) - (exec-query conn query)) + (with-time-logging "finding batch of missing sources" + (exec-query conn query))) (let ((all-derivation-ids - (append-map - derivation-ids->all-related-derivation-ids - (split derivation-ids 250)))) - (derivation-ids->missing-sources all-derivation-ids))) + (with-time-logging "querying for all related dervation ids" + (delete-duplicates/sort! + (append-map! + derivation-ids->all-related-derivation-ids + (chunk! derivation-ids 5000)) + <)))) + + (with-time-logging "querying for missing sources" + (append-map! derivation-ids->missing-sources + (chunk! all-derivation-ids + 10000))))) (if (null? derivation-file-names) '() @@ -1858,6 +1861,6 @@ INNER JOIN derivation_source_files conn (string->number derivation-source-file-id) store-path))) - (select-source-files-missing-nars all-ids))) + (select-source-files-missing-nars! all-ids))) all-ids)))))