Replace derivation-file-names->vhash

Rather than creating vhashes, just update the hash table that is used as a
cache, and query that. This should speed things up and reduce memory usage
when loading derivations.
This commit is contained in:
Christopher Baines 2021-09-24 17:14:40 +01:00
parent 261552bd5e
commit 947cabb685

View file

@ -1498,29 +1498,29 @@ LIMIT $1"
(simple-format (simple-format
#t "debug: ensure-input-derivations-exist: processing ~A derivations\n" #t "debug: ensure-input-derivations-exist: processing ~A derivations\n"
(length input-derivation-file-names)) (length input-derivation-file-names))
(let ((existing-derivation-entries
(derivation-file-names->vhash conn
derivation-ids-hash-table
input-derivation-file-names)))
(simple-format
#t
"debug: ensure-input-derivations-exist: checking for missing input derivations\n")
(let ((missing-derivations-filenames
(filter (lambda (derivation-file-name)
(not (vhash-assoc derivation-file-name
existing-derivation-entries)))
input-derivation-file-names)))
(unless (null? missing-derivations-filenames) (update-derivation-ids-hash-table! conn
(simple-format derivation-ids-hash-table
#f input-derivation-file-names)
"debug: ensure-input-derivations-exist: inserting missing input derivations\n") (simple-format
;; Ensure all the input derivations exist #t
(insert-missing-derivations "debug: ensure-input-derivations-exist: checking for missing input derivations\n")
conn (let ((missing-derivations-filenames
derivation-ids-hash-table (filter (lambda (derivation-file-name)
(map read-derivation-from-file (not (hash-ref derivation-ids-hash-table
missing-derivations-filenames))))))) derivation-file-name)))
input-derivation-file-names)))
(unless (null? missing-derivations-filenames)
(simple-format
#f
"debug: ensure-input-derivations-exist: inserting missing input derivations\n")
;; Ensure all the input derivations exist
(insert-missing-derivations
conn
derivation-ids-hash-table
(map read-derivation-from-file
missing-derivations-filenames))))))
(define (insert-into-derivations) (define (insert-into-derivations)
(string-append (string-append
@ -1698,46 +1698,33 @@ WHERE " criteria ";"))
'() '()
sorted-derivations)) sorted-derivations))
(define (derivation-file-names->vhash conn derivation-ids-hash-table file-names) (define (update-derivation-ids-hash-table! conn
(simple-format #t "debug: derivation-file-names->vhash: ~A file-names\n" derivation-ids-hash-table
(length file-names)) file-names)
(match (fold (match-lambda* (define file-names-count (length file-names))
((file-name (result . missing-file-names))
(let ((cached-id (hash-ref derivation-ids-hash-table (simple-format #t "debug: update-derivation-ids-hash-table!: ~A file-names\n"
file-name))) file-names-count)
(if cached-id (let ((missing-file-names
(cons (vhash-cons file-name cached-id result) (fold (lambda (file-name result)
missing-file-names) (if (hash-ref derivation-ids-hash-table
(cons result file-name)
(cons file-name missing-file-names)))))) result
(cons vlist-null '()) (cons file-name result)))
file-names) '()
((result) file-names)))
(simple-format
#t "debug: derivation-file-names->vhash: lookup ~A file-names, all found\n" (simple-format
(length file-names)) #t "debug: update-derivation-ids-hash-table!: lookup ~A file-names, ~A not cached\n"
result) file-names-count (length missing-file-names))
((result . missing-file-names)
(simple-format (for-each
#t "debug: derivation-file-names->vhash: lookup ~A file-names, ~A not cached\n" (match-lambda
(length file-names) (length missing-file-names)) ((id file-name)
(let ((result-for-missing-file-names (hash-set! derivation-ids-hash-table
(exec-query->vhash file-name
conn (string->number id))))
(select-existing-derivations missing-file-names) (exec-query conn (select-existing-derivations missing-file-names)))))
second ;; file_name
(lambda (result)
(string->number (first result)))))) ;; id
(simple-format
#t "debug: derivation-file-names->vhash: adding ~A entries to the cache\n"
(vlist-length result-for-missing-file-names))
(vhash-fold
(lambda (key value combined)
;; Update the derivation-ids-hash-table as we go through the vhash
(hash-set! derivation-ids-hash-table key value)
(vhash-cons key value combined))
result
result-for-missing-file-names)))))
(define (derivation-file-names->derivation-ids conn derivation-file-names) (define (derivation-file-names->derivation-ids conn derivation-file-names)
(define (select-source-files-missing-nars derivation-ids) (define (select-source-files-missing-nars derivation-ids)
@ -1809,47 +1796,39 @@ INNER JOIN derivation_source_files
(simple-format (simple-format
#t "debug: derivation-file-names->derivation-ids: processing ~A derivations\n" #t "debug: derivation-file-names->derivation-ids: processing ~A derivations\n"
derivations-count) derivations-count)
(let* ((existing-derivation-entries
(derivation-file-names->vhash conn
derivation-ids-hash-table
derivation-file-names))
(missing-derivations (update-derivation-ids-hash-table! conn
(map read-derivation-from-file derivation-ids-hash-table
(deduplicate-strings derivation-file-names)
(filter (lambda (derivation-file-name)
(not (vhash-assoc derivation-file-name
existing-derivation-entries)))
derivation-file-names))))
(new-derivation-entries (let ((missing-derivations
(if (null? missing-derivations) (map read-derivation-from-file
'() (deduplicate-strings
(insert-missing-derivations conn (filter (lambda (derivation-file-name)
derivation-ids-hash-table (not (hash-ref derivation-ids-hash-table
missing-derivations))) derivation-file-name)))
derivation-file-names)))))
(new-entries-id-lookup-vhash (unless (null? missing-derivations)
(two-lists->vhash (map derivation-file-name missing-derivations) (insert-missing-derivations conn
new-derivation-entries)) derivation-ids-hash-table
missing-derivations))
(all-ids
(map (lambda (derivation-file-name)
(cdr
(or (vhash-assoc derivation-file-name
existing-derivation-entries)
(vhash-assoc derivation-file-name
new-entries-id-lookup-vhash)
(error "missing derivation id"))))
derivation-file-names)))
(with-time-logging "inserting missing source files" (let ((all-ids
(for-each (match-lambda (map (lambda (derivation-file-name)
((derivation-source-file-id store-path) (or (hash-ref derivation-ids-hash-table
(insert-derivation-source-file-nar derivation-file-name)
conn (error "missing derivation id")))
(string->number derivation-source-file-id) derivation-file-names)))
store-path)))
(select-source-files-missing-nars all-ids)))
all-ids)))) (with-time-logging "inserting missing source files"
(for-each (match-lambda
((derivation-source-file-id store-path)
(insert-derivation-source-file-nar
conn
(string->number derivation-source-file-id)
store-path)))
(select-source-files-missing-nars all-ids)))
all-ids)))))