Move inserting derivations in to the load-new-guix-revision module

And start to more closely integrate it. This makes it possible to start making
it faster by doing more in parallel.
This commit is contained in:
Christopher Baines 2024-08-07 16:51:57 +01:00
parent 7f746b358b
commit 77962f7c2c
4 changed files with 502 additions and 422 deletions

View file

@ -23,13 +23,10 @@
#:use-module (ice-9 format)
#:use-module (ice-9 binary-ports)
#:use-module (rnrs bytevectors)
#:use-module (gcrypt hash)
#:use-module (squee)
#:use-module (json)
#:use-module (guix base16)
#:use-module (guix base32)
#:use-module (guix serialization)
#:use-module (lzlib)
#:use-module (guix inferior)
#:use-module (guix memoization)
#:use-module (guix derivations)
@ -58,6 +55,10 @@
select-fixed-output-package-derivations-in-revision
select-derivation-outputs-in-revision
fix-derivation-output-details-hash-encoding
insert-derivation-sources
insert-derivation-source-file-nar
insert-derivation-outputs
insert-derivation-inputs
derivation-output-details->derivation-output-details-ids
derivation-output-details-ids->derivation-output-details-set-id
select-derivations-by-revision-name-and-version
@ -66,7 +67,6 @@
select-existing-derivations
select-derivations-by-id
select-derivations-and-build-status
derivation-file-names->derivation-ids
update-derivation-inputs-statistics
vacuum-derivation-inputs-table
update-derivation-outputs-statistics
@ -1487,38 +1487,11 @@ INNER JOIN derivation_outputs
sources-ids))
(define (insert-derivation-source-file-nar conn id source-file)
(define missing?
(match (exec-query
conn
"SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
(list (number->string id)))
(() #t)
(_ #f)))
(when missing?
(let* ((nar-bytevector (call-with-values
(lambda ()
(open-bytevector-output-port))
(lambda (port get-bytevector)
(write-file source-file port)
(get-bytevector))))
(data-string (bytevector->base16-string
(call-with-values
(lambda ()
(open-bytevector-output-port))
(lambda (port get-bytevector)
(call-with-lzip-output-port port
(lambda (port)
(put-bytevector port nar-bytevector))
#:level 9)
(get-bytevector)))))
(hash (bytevector->nix-base32-string
(sha256 nar-bytevector)))
(uncompressed-size (bytevector-length nar-bytevector)))
(exec-query
conn
"
(define (insert-derivation-source-file-nar conn id
hash bytevector uncompressed-size)
(exec-query
conn
"
INSERT INTO derivation_source_file_nars (
derivation_source_file_id,
compression,
@ -1527,12 +1500,12 @@ INSERT INTO derivation_source_file_nars (
uncompressed_size,
data
) VALUES ($1, $2, $3, $4, $5, $6)"
(list (number->string id)
"lzip"
"sha256"
hash
(number->string uncompressed-size)
(string-append "\\x" data-string))))))
(list (number->string id)
"lzip"
"sha256"
hash
(number->string uncompressed-size)
(string-append "\\x" (bytevector->base16-string bytevector)))))
(define* (backfill-derivation-source-file-nars conn #:key
(batch-size 10000)
@ -1564,130 +1537,6 @@ LIMIT $1"
batch)
(when loop? (loop (missing-batch))))))
(define (insert-missing-derivations conn
derivation-ids-hash-table
derivations)
(define (ensure-input-derivations-exist input-derivation-file-names)
(unless (null? input-derivation-file-names)
(simple-format
#t "debug: ensure-input-derivations-exist: processing ~A derivations\n"
(length input-derivation-file-names))
(update-derivation-ids-hash-table! conn
derivation-ids-hash-table
(list->vector
input-derivation-file-names))
(simple-format
#t
"debug: ensure-input-derivations-exist: checking for missing input derivations\n")
(let ((missing-derivations-filenames
(filter (lambda (derivation-file-name)
(not (hash-ref derivation-ids-hash-table
derivation-file-name)))
input-derivation-file-names)))
(unless (null? missing-derivations-filenames)
(simple-format
#f
"debug: ensure-input-derivations-exist: inserting missing input derivations\n")
;; Ensure all the input derivations exist
(insert-missing-derivations
conn
derivation-ids-hash-table
(map read-derivation-from-file
missing-derivations-filenames))))))
(define (insert-into-derivations dervs)
(string-append
"INSERT INTO derivations "
"(file_name, builder, args, env_vars, system_id) VALUES "
(string-join
(map (match-lambda
(($ <derivation> outputs inputs sources
system builder args env-vars file-name)
(simple-format
#f "('~A', '~A', ARRAY[~A]::varchar[], ARRAY[~A], '~A')"
file-name
builder
(string-join (map quote-string args) ",")
(string-join (map (match-lambda
((key . value)
(string-append
"['" key '"', $$"
value "$$ ]")))
env-vars)
",")
(system->system-id conn system))))
dervs)
",")
" RETURNING id"
";"))
(with-time-logging
(simple-format
#f "insert-missing-derivations: inserting ~A derivations"
(length derivations))
(let ((derivation-ids
(append-map
(lambda (chunk)
(map (lambda (result)
(string->number (car result)))
(exec-query conn (insert-into-derivations chunk))))
(chunk derivations 500))))
(with-time-logging
"insert-missing-derivations: updating hash table"
(for-each (lambda (derivation derivation-id)
(hash-set! derivation-ids-hash-table
(derivation-file-name derivation)
derivation-id))
derivations
derivation-ids))
(with-time-logging
"insert-missing-derivations: inserting outputs"
(for-each (lambda (derivation-id derivation)
(insert-derivation-outputs conn
derivation-id
(derivation-outputs derivation)))
derivation-ids
derivations))
(with-time-logging
"insert-missing-derivations: inserting sources"
(for-each (lambda (derivation-id derivation)
(let ((sources (derivation-sources derivation)))
(unless (null? sources)
(let ((sources-ids
(insert-derivation-sources conn
derivation-id
sources)))
(map (lambda (id source-file)
(insert-derivation-source-file-nar conn
id
source-file))
sources-ids
sources)))))
derivation-ids
derivations))
(with-time-logging
"insert-missing-derivations: ensure-input-derivations-exist"
(ensure-input-derivations-exist (deduplicate-strings
(map derivation-input-path
(append-map derivation-inputs
derivations)))))
(with-time-logging
(simple-format
#f "insert-missing-derivations: inserting inputs for ~A derivations"
(length derivations))
(insert-derivation-inputs conn
derivation-ids
derivations))
derivation-ids)))
(define (select-derivations-by-id conn ids)
(define query
(string-append "SELECT id, file_name "
@ -1772,40 +1621,6 @@ WHERE " criteria ";"))
'()
sorted-derivations))
(define (update-derivation-ids-hash-table! conn
derivation-ids-hash-table
file-names)
(define file-names-count (vector-length file-names))
(simple-format #t "debug: update-derivation-ids-hash-table!: ~A file-names\n"
file-names-count)
(let ((missing-file-names
(vector-fold
(lambda (_ result file-name)
(if (and file-name
(hash-ref derivation-ids-hash-table
file-name))
result
(cons file-name result)))
'()
file-names)))
(simple-format
#t "debug: update-derivation-ids-hash-table!: lookup ~A file-names, ~A not cached\n"
file-names-count (length missing-file-names))
(unless (null? missing-file-names)
(for-each
(lambda (chunk)
(for-each
(match-lambda
((id file-name)
(hash-set! derivation-ids-hash-table
file-name
(string->number id))))
(exec-query conn (select-existing-derivations chunk))))
(chunk! missing-file-names 1000)))))
(define (insert-source-files-missing-nars conn derivation-ids)
(define (derivation-ids->next-related-derivation-ids! ids seen-ids)
(delete-duplicates/sort!
@ -1888,71 +1703,6 @@ INNER JOIN derivation_source_files
next-related-derivation-ids
seen-ids))))))
(define (derivation-file-names->derivation-ids conn derivation-file-names)
(define derivations-count
(vector-length derivation-file-names))
(if (= 0 derivations-count)
#()
(let* ((derivation-ids-hash-table (make-hash-table
;; Account for more derivations in
;; the graph
(* 2 derivations-count))))
(simple-format
#t "debug: derivation-file-names->derivation-ids: processing ~A derivations\n"
derivations-count)
(update-derivation-ids-hash-table! conn
derivation-ids-hash-table
derivation-file-names)
(let ((missing-derivation-filenames
(deduplicate-strings
(vector-fold
(lambda (_ result derivation-file-name)
(if (not derivation-file-name)
result
(if (hash-ref derivation-ids-hash-table
derivation-file-name)
result
(cons derivation-file-name result))))
'()
derivation-file-names))))
(chunk-for-each!
(lambda (missing-derivation-filenames-chunk)
(let ((missing-derivations-chunk
(with-time-logging
(simple-format #f "reading ~A missing derivations"
(length missing-derivation-filenames-chunk))
(map read-derivation-from-file
;; Do the filter again, since processing the last
;; chunk might have inserted some of the
;; derivations in this chunk
(filter (lambda (derivation-file-name)
(not (hash-ref derivation-ids-hash-table
derivation-file-name)))
missing-derivation-filenames-chunk)))))
(unless (null? missing-derivations-chunk)
(insert-missing-derivations conn
derivation-ids-hash-table
missing-derivations-chunk))))
1000
missing-derivation-filenames)
(let ((all-ids
(vector-map
(lambda (_ derivation-file-name)
(if derivation-file-name
(or (hash-ref derivation-ids-hash-table
derivation-file-name)
(error "missing derivation id"))
#f))
derivation-file-names)))
all-ids)))))
(define (update-derivation-inputs-statistics conn)
(let ((query
"