Try to further speed up inserting missing derivation source files
Switch from using a recursive query to doing a breath first search through the graph of derivations, as I think PostgreSQL wasn't doing a great job of planning the recursive queries (it would overestimate the rows involved, and prefer sequential scans for the derivation_outputs table).
This commit is contained in:
parent
c5b504e94a
commit
f86657915e
1 changed files with 67 additions and 51 deletions
|
|
@ -1738,37 +1738,38 @@ WHERE " criteria ";"))
|
|||
(chunk! missing-file-names 2000)))))
|
||||
|
||||
(define (derivation-file-names->derivation-ids conn derivation-file-names)
|
||||
(define (select-source-files-missing-nars! derivation-ids)
|
||||
(define (derivation-ids->all-related-derivation-ids ids)
|
||||
(define query
|
||||
(define (insert-source-files-missing-nars derivation-ids)
|
||||
(define (derivation-ids->next-related-derivation-ids! ids seen-ids)
|
||||
(delete-duplicates/sort!
|
||||
(append-map!
|
||||
(lambda (ids-chunk)
|
||||
(let ((query
|
||||
(string-append
|
||||
"
|
||||
WITH RECURSIVE all_derivations AS (
|
||||
SELECT column1 AS derivation_id
|
||||
FROM (VALUES "
|
||||
(string-join (map
|
||||
(lambda (id)
|
||||
(string-append "(" id ")"))
|
||||
(map number->string ids))
|
||||
", ")
|
||||
") AS data
|
||||
UNION
|
||||
SELECT derivation_outputs.derivation_id
|
||||
FROM all_derivations
|
||||
INNER JOIN derivation_inputs
|
||||
ON derivation_inputs.derivation_id = all_derivations.derivation_id
|
||||
FROM derivation_inputs
|
||||
INNER JOIN derivation_outputs
|
||||
ON derivation_outputs.id = derivation_inputs.derivation_output_id
|
||||
)
|
||||
SELECT all_derivations.derivation_id
|
||||
FROM all_derivations"))
|
||||
WHERE derivation_inputs.derivation_id IN ("
|
||||
(string-join (map number->string ids) ",")
|
||||
")")))
|
||||
|
||||
(map (lambda (row)
|
||||
(filter-map
|
||||
(lambda (row)
|
||||
(let ((number
|
||||
(string->number
|
||||
(car row)))
|
||||
(car row))))
|
||||
(if (hash-ref seen-ids number)
|
||||
#f
|
||||
(begin
|
||||
(hash-set! seen-ids number #t)
|
||||
|
||||
number))))
|
||||
(with-time-logging
|
||||
"querying for batch of all related derivation ids"
|
||||
(exec-query conn query))))
|
||||
(exec-query conn query)))))
|
||||
(chunk! ids 2000))
|
||||
<))
|
||||
|
||||
(define (derivation-ids->missing-sources ids)
|
||||
(define query
|
||||
|
|
@ -1787,22 +1788,43 @@ INNER JOIN derivation_source_files
|
|||
")
|
||||
AND derivation_source_file_nars.derivation_source_file_id IS NULL"))
|
||||
|
||||
(map (lambda (row)
|
||||
(list (string->number (first row))
|
||||
(second row)))
|
||||
(with-time-logging "finding batch of missing sources"
|
||||
(exec-query conn query)))
|
||||
|
||||
(let ((all-derivation-ids
|
||||
(with-time-logging "querying for all related dervation ids"
|
||||
(delete-duplicates/sort!
|
||||
(append-map!
|
||||
derivation-ids->all-related-derivation-ids
|
||||
(chunk! derivation-ids 5000))
|
||||
<))))
|
||||
(exec-query conn query))))
|
||||
|
||||
(let ((seen-ids (make-hash-table)))
|
||||
(let loop ((next-related-derivation-ids
|
||||
(with-time-logging "querying for next related dervation ids"
|
||||
(derivation-ids->next-related-derivation-ids!
|
||||
(list-copy derivation-ids)
|
||||
seen-ids))))
|
||||
(unless (null? next-related-derivation-ids)
|
||||
(let ((missing-sources
|
||||
(with-time-logging "querying for missing sources"
|
||||
(append-map! derivation-ids->missing-sources
|
||||
(chunk! all-derivation-ids
|
||||
(chunk next-related-derivation-ids
|
||||
10000)))))
|
||||
|
||||
(unless (null? missing-sources)
|
||||
(with-time-logging
|
||||
(simple-format #f "inserting ~A missing source files"
|
||||
(length missing-sources))
|
||||
(for-each (match-lambda
|
||||
((derivation-source-file-id store-path)
|
||||
(insert-derivation-source-file-nar
|
||||
conn
|
||||
derivation-source-file-id
|
||||
store-path)))
|
||||
missing-sources))))
|
||||
|
||||
(loop
|
||||
(with-time-logging "querying for next related dervation ids"
|
||||
(derivation-ids->next-related-derivation-ids!
|
||||
next-related-derivation-ids
|
||||
seen-ids)))))))
|
||||
|
||||
(if (null? derivation-file-names)
|
||||
'()
|
||||
(let* ((derivations-count (length derivation-file-names))
|
||||
|
|
@ -1854,13 +1876,7 @@ INNER JOIN derivation_source_files
|
|||
(error "missing derivation id")))
|
||||
derivation-file-names)))
|
||||
|
||||
(with-time-logging "inserting missing source files"
|
||||
(for-each (match-lambda
|
||||
((derivation-source-file-id store-path)
|
||||
(insert-derivation-source-file-nar
|
||||
conn
|
||||
(string->number derivation-source-file-id)
|
||||
store-path)))
|
||||
(select-source-files-missing-nars! all-ids)))
|
||||
(with-time-logging "insert-source-files-missing-nars"
|
||||
(insert-source-files-missing-nars all-ids))
|
||||
|
||||
all-ids)))))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue