Re-write insert-derivation-inputs in a more memory efficient manor

Previously it would compute a long list of strings, potentially more than
100,000 elements long, then split this string up and insert it in chunks. Only
then could memory be freed.

This new approach builds the strings in batches for the insertion query, then
moves on to the next batch. This should mean that more memory can be freed and
reused along the way.
This commit is contained in:
Christopher Baines 2022-01-12 18:18:15 +00:00
parent 6102553d94
commit 21cb33a859
2 changed files with 64 additions and 35 deletions

View file

@ -1329,48 +1329,51 @@ WHERE derivation_source_files.store_path = $1"
#f))) #f)))
(define (insert-derivation-inputs conn derivation-ids derivations) (define (insert-derivation-inputs conn derivation-ids derivations)
(let ((data (define (process-chunk derivation-ids derivations)
(append-map (let ((query-parts
(lambda (derivation-id derivation) (append-map!
(append-map (lambda (derivation-id derivation)
(match-lambda (append-map!
(($ <derivation-input> derivation-or-path sub-derivations) (match-lambda
(let ((path (($ <derivation-input> derivation-or-path sub-derivations)
(match derivation-or-path (let ((path
((? derivation? d) (match derivation-or-path
;; The first field changed to a derivation (from the file ((? derivation? d)
;; name) in 5cf4b26d52bcea382d98fb4becce89be9ee37b55 ;; The first field changed to a derivation (from the file
(derivation-file-name d)) ;; name) in 5cf4b26d52bcea382d98fb4becce89be9ee37b55
((? string? s) (derivation-file-name d))
s)))) ((? string? s)
(map (lambda (sub-derivation) s))))
(string-append "(" (map (lambda (sub-derivation)
(number->string derivation-id) (string-append "("
", '" path (number->string derivation-id)
"', '" sub-derivation "')")) ", '" path
sub-derivations)))) "', '" sub-derivation "')"))
(derivation-inputs derivation))) sub-derivations))))
derivation-ids (derivation-inputs derivation)))
derivations))) derivation-ids
derivations)))
(unless (null? data) (unless (null? query-parts)
(for-each (exec-query
(lambda (chunk) conn
(exec-query (string-append
conn "
(string-append
"
INSERT INTO derivation_inputs (derivation_id, derivation_output_id) INSERT INTO derivation_inputs (derivation_id, derivation_output_id)
SELECT vals.derivation_id, derivation_outputs.id SELECT vals.derivation_id, derivation_outputs.id
FROM (VALUES " FROM (VALUES "
(string-join chunk ", ") (string-join query-parts ", ")
") AS vals (derivation_id, file_name, output_name) ") AS vals (derivation_id, file_name, output_name)
INNER JOIN derivations INNER JOIN derivations
ON derivations.file_name = vals.file_name ON derivations.file_name = vals.file_name
INNER JOIN derivation_outputs INNER JOIN derivation_outputs
ON derivation_outputs.derivation_id = derivations.id ON derivation_outputs.derivation_id = derivations.id
AND vals.output_name = derivation_outputs.name"))) AND vals.output_name = derivation_outputs.name")))))
(chunk! data 1000)))))
(chunk-map! process-chunk
1000
(list-copy derivation-ids)
(list-copy derivations)))
(define (select-from-derivation-source-files store-paths) (define (select-from-derivation-source-files store-paths)
(string-append (string-append

View file

@ -32,7 +32,8 @@
letpar& letpar&
chunk chunk
chunk!)) chunk!
chunk-map!))
(define (call-with-time-logging action thunk) (define (call-with-time-logging action thunk)
(simple-format #t "debug: Starting ~A\n" action) (simple-format #t "debug: Starting ~A\n" action)
@ -175,3 +176,28 @@
(cons first-lst (cons first-lst
(chunk! rest max-length)))) (chunk! rest max-length))))
(list lst))) (list lst)))
(define* (chunk-map! proc chunk-size #:rest lsts)
(define (do-one-iteration lsts)
(if (> (length (car lsts))
chunk-size)
(let ((chunks-and-rest
(map (lambda (lst)
(call-with-values (lambda ()
(split-at! lst chunk-size))
(lambda (first-lst rest)
(cons first-lst
rest))))
lsts)))
(apply proc
(map car chunks-and-rest))
(do-one-iteration
(map cdr chunks-and-rest)))
(apply proc lsts)))
(unless (eq? 1
(length (delete-duplicates
(map length lsts))))
(error "lists not equal length"))
(do-one-iteration lsts))