Try to further speed up inserting missing derivation source files
Switch from using a recursive query to doing a breath first search through the graph of derivations, as I think PostgreSQL wasn't doing a great job of planning the recursive queries (it would overestimate the rows involved, and prefer sequential scans for the derivation_outputs table).
This commit is contained in:
parent
c5b504e94a
commit
f86657915e
1 changed files with 67 additions and 51 deletions
|
|
@ -1738,37 +1738,38 @@ WHERE " criteria ";"))
|
||||||
(chunk! missing-file-names 2000)))))
|
(chunk! missing-file-names 2000)))))
|
||||||
|
|
||||||
(define (derivation-file-names->derivation-ids conn derivation-file-names)
|
(define (derivation-file-names->derivation-ids conn derivation-file-names)
|
||||||
(define (select-source-files-missing-nars! derivation-ids)
|
(define (insert-source-files-missing-nars derivation-ids)
|
||||||
(define (derivation-ids->all-related-derivation-ids ids)
|
(define (derivation-ids->next-related-derivation-ids! ids seen-ids)
|
||||||
(define query
|
(delete-duplicates/sort!
|
||||||
(string-append
|
(append-map!
|
||||||
"
|
(lambda (ids-chunk)
|
||||||
WITH RECURSIVE all_derivations AS (
|
(let ((query
|
||||||
SELECT column1 AS derivation_id
|
(string-append
|
||||||
FROM (VALUES "
|
"
|
||||||
(string-join (map
|
SELECT derivation_outputs.derivation_id
|
||||||
(lambda (id)
|
FROM derivation_inputs
|
||||||
(string-append "(" id ")"))
|
INNER JOIN derivation_outputs
|
||||||
(map number->string ids))
|
ON derivation_outputs.id = derivation_inputs.derivation_output_id
|
||||||
", ")
|
WHERE derivation_inputs.derivation_id IN ("
|
||||||
") AS data
|
(string-join (map number->string ids) ",")
|
||||||
UNION
|
")")))
|
||||||
SELECT derivation_outputs.derivation_id
|
|
||||||
FROM all_derivations
|
|
||||||
INNER JOIN derivation_inputs
|
|
||||||
ON derivation_inputs.derivation_id = all_derivations.derivation_id
|
|
||||||
INNER JOIN derivation_outputs
|
|
||||||
ON derivation_outputs.id = derivation_inputs.derivation_output_id
|
|
||||||
)
|
|
||||||
SELECT all_derivations.derivation_id
|
|
||||||
FROM all_derivations"))
|
|
||||||
|
|
||||||
(map (lambda (row)
|
(filter-map
|
||||||
(string->number
|
(lambda (row)
|
||||||
(car row)))
|
(let ((number
|
||||||
(with-time-logging
|
(string->number
|
||||||
"querying for batch of all related derivation ids"
|
(car row))))
|
||||||
(exec-query conn query))))
|
(if (hash-ref seen-ids number)
|
||||||
|
#f
|
||||||
|
(begin
|
||||||
|
(hash-set! seen-ids number #t)
|
||||||
|
|
||||||
|
number))))
|
||||||
|
(with-time-logging
|
||||||
|
"querying for batch of all related derivation ids"
|
||||||
|
(exec-query conn query)))))
|
||||||
|
(chunk! ids 2000))
|
||||||
|
<))
|
||||||
|
|
||||||
(define (derivation-ids->missing-sources ids)
|
(define (derivation-ids->missing-sources ids)
|
||||||
(define query
|
(define query
|
||||||
|
|
@ -1787,21 +1788,42 @@ INNER JOIN derivation_source_files
|
||||||
")
|
")
|
||||||
AND derivation_source_file_nars.derivation_source_file_id IS NULL"))
|
AND derivation_source_file_nars.derivation_source_file_id IS NULL"))
|
||||||
|
|
||||||
(with-time-logging "finding batch of missing sources"
|
(map (lambda (row)
|
||||||
(exec-query conn query)))
|
(list (string->number (first row))
|
||||||
|
(second row)))
|
||||||
|
(with-time-logging "finding batch of missing sources"
|
||||||
|
(exec-query conn query))))
|
||||||
|
|
||||||
(let ((all-derivation-ids
|
(let ((seen-ids (make-hash-table)))
|
||||||
(with-time-logging "querying for all related dervation ids"
|
(let loop ((next-related-derivation-ids
|
||||||
(delete-duplicates/sort!
|
(with-time-logging "querying for next related dervation ids"
|
||||||
(append-map!
|
(derivation-ids->next-related-derivation-ids!
|
||||||
derivation-ids->all-related-derivation-ids
|
(list-copy derivation-ids)
|
||||||
(chunk! derivation-ids 5000))
|
seen-ids))))
|
||||||
<))))
|
(unless (null? next-related-derivation-ids)
|
||||||
|
(let ((missing-sources
|
||||||
|
(with-time-logging "querying for missing sources"
|
||||||
|
(append-map! derivation-ids->missing-sources
|
||||||
|
(chunk next-related-derivation-ids
|
||||||
|
10000)))))
|
||||||
|
|
||||||
(with-time-logging "querying for missing sources"
|
(unless (null? missing-sources)
|
||||||
(append-map! derivation-ids->missing-sources
|
(with-time-logging
|
||||||
(chunk! all-derivation-ids
|
(simple-format #f "inserting ~A missing source files"
|
||||||
10000)))))
|
(length missing-sources))
|
||||||
|
(for-each (match-lambda
|
||||||
|
((derivation-source-file-id store-path)
|
||||||
|
(insert-derivation-source-file-nar
|
||||||
|
conn
|
||||||
|
derivation-source-file-id
|
||||||
|
store-path)))
|
||||||
|
missing-sources))))
|
||||||
|
|
||||||
|
(loop
|
||||||
|
(with-time-logging "querying for next related dervation ids"
|
||||||
|
(derivation-ids->next-related-derivation-ids!
|
||||||
|
next-related-derivation-ids
|
||||||
|
seen-ids)))))))
|
||||||
|
|
||||||
(if (null? derivation-file-names)
|
(if (null? derivation-file-names)
|
||||||
'()
|
'()
|
||||||
|
|
@ -1854,13 +1876,7 @@ INNER JOIN derivation_source_files
|
||||||
(error "missing derivation id")))
|
(error "missing derivation id")))
|
||||||
derivation-file-names)))
|
derivation-file-names)))
|
||||||
|
|
||||||
(with-time-logging "inserting missing source files"
|
(with-time-logging "insert-source-files-missing-nars"
|
||||||
(for-each (match-lambda
|
(insert-source-files-missing-nars all-ids))
|
||||||
((derivation-source-file-id store-path)
|
|
||||||
(insert-derivation-source-file-nar
|
|
||||||
conn
|
|
||||||
(string->number derivation-source-file-id)
|
|
||||||
store-path)))
|
|
||||||
(select-source-files-missing-nars! all-ids)))
|
|
||||||
|
|
||||||
all-ids)))))
|
all-ids)))))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue