diff --git a/guix-data-service/model/derivation.scm b/guix-data-service/model/derivation.scm index c1d1c69..3c5d0fb 100644 --- a/guix-data-service/model/derivation.scm +++ b/guix-data-service/model/derivation.scm @@ -1738,37 +1738,38 @@ WHERE " criteria ";")) (chunk! missing-file-names 2000))))) (define (derivation-file-names->derivation-ids conn derivation-file-names) - (define (select-source-files-missing-nars! derivation-ids) - (define (derivation-ids->all-related-derivation-ids ids) - (define query - (string-append - " -WITH RECURSIVE all_derivations AS ( - SELECT column1 AS derivation_id - FROM (VALUES " - (string-join (map - (lambda (id) - (string-append "(" id ")")) - (map number->string ids)) - ", ") - ") AS data - UNION - SELECT derivation_outputs.derivation_id - FROM all_derivations - INNER JOIN derivation_inputs - ON derivation_inputs.derivation_id = all_derivations.derivation_id - INNER JOIN derivation_outputs - ON derivation_outputs.id = derivation_inputs.derivation_output_id -) -SELECT all_derivations.derivation_id -FROM all_derivations")) + (define (insert-source-files-missing-nars derivation-ids) + (define (derivation-ids->next-related-derivation-ids! ids seen-ids) + (delete-duplicates/sort! + (append-map! + (lambda (ids-chunk) + (let ((query + (string-append + " +SELECT derivation_outputs.derivation_id +FROM derivation_inputs +INNER JOIN derivation_outputs + ON derivation_outputs.id = derivation_inputs.derivation_output_id +WHERE derivation_inputs.derivation_id IN (" + (string-join (map number->string ids) ",") + ")"))) - (map (lambda (row) - (string->number - (car row))) - (with-time-logging - "querying for batch of all related derivation ids" - (exec-query conn query)))) + (filter-map + (lambda (row) + (let ((number + (string->number + (car row)))) + (if (hash-ref seen-ids number) + #f + (begin + (hash-set! seen-ids number #t) + + number)))) + (with-time-logging + "querying for batch of all related derivation ids" + (exec-query conn query))))) + (chunk! ids 2000)) + <)) (define (derivation-ids->missing-sources ids) (define query @@ -1787,21 +1788,42 @@ INNER JOIN derivation_source_files ") AND derivation_source_file_nars.derivation_source_file_id IS NULL")) - (with-time-logging "finding batch of missing sources" - (exec-query conn query))) + (map (lambda (row) + (list (string->number (first row)) + (second row))) + (with-time-logging "finding batch of missing sources" + (exec-query conn query)))) - (let ((all-derivation-ids - (with-time-logging "querying for all related dervation ids" - (delete-duplicates/sort! - (append-map! - derivation-ids->all-related-derivation-ids - (chunk! derivation-ids 5000)) - <)))) + (let ((seen-ids (make-hash-table))) + (let loop ((next-related-derivation-ids + (with-time-logging "querying for next related dervation ids" + (derivation-ids->next-related-derivation-ids! + (list-copy derivation-ids) + seen-ids)))) + (unless (null? next-related-derivation-ids) + (let ((missing-sources + (with-time-logging "querying for missing sources" + (append-map! derivation-ids->missing-sources + (chunk next-related-derivation-ids + 10000))))) - (with-time-logging "querying for missing sources" - (append-map! derivation-ids->missing-sources - (chunk! all-derivation-ids - 10000))))) + (unless (null? missing-sources) + (with-time-logging + (simple-format #f "inserting ~A missing source files" + (length missing-sources)) + (for-each (match-lambda + ((derivation-source-file-id store-path) + (insert-derivation-source-file-nar + conn + derivation-source-file-id + store-path))) + missing-sources)))) + + (loop + (with-time-logging "querying for next related dervation ids" + (derivation-ids->next-related-derivation-ids! + next-related-derivation-ids + seen-ids))))))) (if (null? derivation-file-names) '() @@ -1854,13 +1876,7 @@ INNER JOIN derivation_source_files (error "missing derivation id"))) derivation-file-names))) - (with-time-logging "inserting missing source files" - (for-each (match-lambda - ((derivation-source-file-id store-path) - (insert-derivation-source-file-nar - conn - (string->number derivation-source-file-id) - store-path))) - (select-source-files-missing-nars! all-ids))) + (with-time-logging "insert-source-files-missing-nars" + (insert-source-files-missing-nars all-ids)) all-ids)))))