Move inserting derivations in to the load-new-guix-revision module

And start to more closely integrate it. This makes it possible to start making
it faster by doing more in parallel.
This commit is contained in:
Christopher Baines 2024-08-07 16:51:57 +01:00
parent 7f746b358b
commit 77962f7c2c
4 changed files with 502 additions and 422 deletions

View file

@ -25,13 +25,18 @@
#:use-module (ice-9 textual-ports)
#:use-module (ice-9 hash-table)
#:use-module (ice-9 suspendable-ports)
#:use-module (ice-9 binary-ports)
#:use-module ((ice-9 ports internal) #:select (port-poll))
#:use-module (rnrs bytevectors)
#:use-module (rnrs exceptions)
#:use-module (lzlib)
#:use-module (json)
#:use-module (squee)
#:use-module (gcrypt hash)
#:use-module (fibers)
#:use-module (fibers channels)
#:use-module (guix monads)
#:use-module (guix base32)
#:use-module (guix store)
#:use-module (guix channels)
#:use-module (guix inferior)
@ -41,6 +46,7 @@
#:use-module (guix progress)
#:use-module (guix packages)
#:use-module (guix derivations)
#:use-module (guix serialization)
#:use-module (guix build utils)
#:use-module ((guix build syscalls)
#:select (set-thread-name))
@ -49,6 +55,7 @@
#:use-module (guix-data-service utils)
#:use-module (guix-data-service model utils)
#:use-module (guix-data-service model build)
#:use-module (guix-data-service model system)
#:use-module (guix-data-service model channel-instance)
#:use-module (guix-data-service model channel-news)
#:use-module (guix-data-service model package)
@ -477,10 +484,7 @@
(package-derivation store package system))))
;; You don't always get what you ask for, so check
(if (string=? system (derivation-system derivation))
(let ((file-name
(derivation-file-name derivation)))
(add-temp-root store file-name)
file-name)
(derivation-file-name derivation)
(begin
(simple-format
(current-error-port)
@ -907,6 +911,294 @@
lint-checker-ids
lint-warnings-data)))
(define (update-derivation-ids-hash-table! conn
derivation-ids-hash-table
file-names)
(define file-names-count (vector-length file-names))
(simple-format #t "debug: update-derivation-ids-hash-table!: ~A file-names\n"
file-names-count)
(let ((missing-file-names
(vector-fold
(lambda (_ result file-name)
(if (and file-name
(hash-ref derivation-ids-hash-table
file-name))
result
(cons file-name result)))
'()
file-names)))
(simple-format
#t "debug: update-derivation-ids-hash-table!: lookup ~A file-names, ~A not cached\n"
file-names-count (length missing-file-names))
(unless (null? missing-file-names)
(for-each
(lambda (chunk)
(for-each
(match-lambda
((id file-name)
(hash-set! derivation-ids-hash-table
file-name
(string->number id))))
(exec-query conn (select-existing-derivations chunk))))
(chunk! missing-file-names 1000)))))
(define (insert-missing-derivations postgresql-connection-pool
utility-thread-channel
derivation-ids-hash-table
derivations)
(define (ensure-input-derivations-exist input-derivation-file-names)
(unless (null? input-derivation-file-names)
(simple-format
#t "debug: ensure-input-derivations-exist: processing ~A derivations\n"
(length input-derivation-file-names))
(with-resource-from-pool postgresql-connection-pool conn
(update-derivation-ids-hash-table! conn
derivation-ids-hash-table
(list->vector
input-derivation-file-names)))
(simple-format
#t
"debug: ensure-input-derivations-exist: checking for missing input derivations\n")
(let ((missing-derivations-filenames
(remove (lambda (derivation-file-name)
(hash-ref derivation-ids-hash-table
derivation-file-name))
input-derivation-file-names)))
(unless (null? missing-derivations-filenames)
(simple-format
#f
"debug: ensure-input-derivations-exist: inserting missing input derivations\n")
;; Ensure all the input derivations exist
(insert-missing-derivations
postgresql-connection-pool
utility-thread-channel
derivation-ids-hash-table
(call-with-worker-thread
utility-thread-channel
(lambda ()
(map read-derivation-from-file
missing-derivations-filenames))))))))
(define (insert-into-derivations conn drvs)
(string-append
"INSERT INTO derivations "
"(file_name, builder, args, env_vars, system_id) VALUES "
(string-join
(map (match-lambda
(($ <derivation> outputs inputs sources
system builder args env-vars file-name)
(simple-format
#f "('~A', '~A', ARRAY[~A]::varchar[], ARRAY[~A], '~A')"
file-name
builder
(string-join (map quote-string args) ",")
(string-join (map (match-lambda
((key . value)
(string-append
"['" key '"', $$"
value "$$ ]")))
env-vars)
",")
(system->system-id conn system))))
drvs)
",")
" RETURNING id"
";"))
(with-time-logging
(simple-format
#f "insert-missing-derivations: inserting ~A derivations"
(length derivations))
(let* ((chunks (chunk derivations 500))
(derivation-ids
(with-resource-from-pool postgresql-connection-pool conn
(append-map!
(lambda (chunk)
(map (lambda (result)
(string->number (car result)))
(exec-query conn (insert-into-derivations conn chunk))))
chunks))))
(with-time-logging
"insert-missing-derivations: updating hash table"
(for-each (lambda (derivation derivation-id)
(hash-set! derivation-ids-hash-table
(derivation-file-name derivation)
derivation-id))
derivations
derivation-ids))
(with-time-logging
"insert-missing-derivations: inserting sources"
(for-each
(lambda (derivation-id derivation)
(let ((sources (derivation-sources derivation)))
(unless (null? sources)
(let ((sources-ids
(with-resource-from-pool postgresql-connection-pool conn
(insert-derivation-sources conn
derivation-id
sources))))
(par-map&
(lambda (id source-file)
(match
(with-resource-from-pool postgresql-connection-pool conn
(exec-query
conn
"
SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
(list (number->string id))))
(()
(let ((nar-bytevector
(call-with-worker-thread
utility-thread-channel
(lambda ()
(call-with-values
(lambda ()
(open-bytevector-output-port))
(lambda (port get-bytevector)
(write-file source-file port)
(get-bytevector)))))))
(letpar&
((compressed-nar-bytevector
(call-with-worker-thread
utility-thread-channel
(lambda ()
(call-with-values
(lambda ()
(open-bytevector-output-port))
(lambda (port get-bytevector)
(call-with-lzip-output-port port
(lambda (port)
(put-bytevector port nar-bytevector))
#:level 9)
(get-bytevector))))))
(hash
(call-with-worker-thread
utility-thread-channel
(lambda ()
(bytevector->nix-base32-string
(sha256 nar-bytevector)))))
(uncompressed-size (bytevector-length nar-bytevector)))
(with-resource-from-pool postgresql-connection-pool conn
(insert-derivation-source-file-nar
conn
id
hash
compressed-nar-bytevector
uncompressed-size)))))
(_ #f)))
sources-ids
sources)))))
derivation-ids
derivations))
(with-resource-from-pool postgresql-connection-pool conn
(with-time-logging
"insert-missing-derivations: inserting outputs"
(for-each (lambda (derivation-id derivation)
(insert-derivation-outputs conn
derivation-id
(derivation-outputs derivation)))
derivation-ids
derivations)))
(with-time-logging
"insert-missing-derivations: ensure-input-derivations-exist"
(ensure-input-derivations-exist (deduplicate-strings
(map derivation-input-path
(append-map derivation-inputs
derivations)))))
(with-resource-from-pool postgresql-connection-pool conn
(with-time-logging
(simple-format
#f "insert-missing-derivations: inserting inputs for ~A derivations"
(length derivations))
(insert-derivation-inputs conn
derivation-ids
derivations))))))
(define (derivation-file-names->derivation-ids postgresql-connection-pool
utility-thread-channel
derivation-file-names)
(define derivations-count
(vector-length derivation-file-names))
(if (= 0 derivations-count)
#()
(let* ((derivation-ids-hash-table (make-hash-table
;; Account for more derivations in
;; the graph
(* 2 derivations-count))))
(simple-format
#t "debug: derivation-file-names->derivation-ids: processing ~A derivations\n"
derivations-count)
(with-resource-from-pool postgresql-connection-pool conn
(update-derivation-ids-hash-table! conn
derivation-ids-hash-table
derivation-file-names))
(let* ((missing-derivation-filenames
(deduplicate-strings
(vector-fold
(lambda (_ result derivation-file-name)
(if (not derivation-file-name)
result
(if (hash-ref derivation-ids-hash-table
derivation-file-name)
result
(cons derivation-file-name result))))
'()
derivation-file-names)))
(missing-derivations-chunked-promises
(map
(lambda (chunk)
(fibers-delay
(lambda ()
(map read-derivation-from-file chunk))))
(chunk! missing-derivation-filenames 1000))))
(for-each
(lambda (missing-derivation-filenames-chunk)
(let ((missing-derivations-chunk
;; Do the filter again, since processing the last chunk
;; might have inserted some of the derivations in this
;; chunk
(remove! (lambda (derivation)
(hash-ref derivation-ids-hash-table
(derivation-file-name
derivation)))
(fibers-force
missing-derivation-filenames-chunk))))
(unless (null? missing-derivations-chunk)
(insert-missing-derivations postgresql-connection-pool
utility-thread-channel
derivation-ids-hash-table
missing-derivations-chunk))))
missing-derivations-chunked-promises))
(let ((all-ids
(vector-map
(lambda (_ derivation-file-name)
(if derivation-file-name
(or (hash-ref derivation-ids-hash-table
derivation-file-name)
(error "missing derivation id"))
#f))
derivation-file-names)))
all-ids))))
(define guix-store-path
(let ((store-path #f))
(lambda (store)
@ -1162,7 +1454,7 @@
(cons inferior inferior-store)))
parallelism
#:min-size 0
#:idle-seconds 10
#:idle-seconds 30
#:destructor (match-lambda
((inferior . store)
(close-inferior inferior)
@ -1399,6 +1691,7 @@
(define* (extract-information-from db-conn guix-revision-id commit
guix-source store-item
utility-thread-channel
#:key skip-system-tests?
extra-inferior-environment-variables
parallelism)
@ -1454,25 +1747,29 @@
1
#:min-size 0))
(define packages-data-promise
(fibers-delay
(lambda ()
(with-resource-from-pool inf-and-store-pool res
(match res
((inferior . inferior-store)
(with-time-logging "getting all inferior package data"
(let ((packages
pkg-to-replacement-hash-table
(inferior-packages-plus-replacements inferior)))
(all-inferior-packages-data
inferior
packages
pkg-to-replacement-hash-table)))))))))
(define derivation-file-names->derivation-ids/fiberized
(fiberize
(lambda (derivation-file-names)
(derivation-file-names->derivation-ids
postgresql-connection-pool
utility-thread-channel
derivation-file-names))))
(define package-ids-promise
(fibers-delay
(lambda ()
(let ((packages-data (fibers-force packages-data-promise)))
(let ((packages-data
(with-resource-from-pool inf-and-store-pool res
(match res
((inferior . inferior-store)
(with-time-logging "getting all inferior package data"
(let ((packages
pkg-to-replacement-hash-table
(inferior-packages-plus-replacements inferior)))
(all-inferior-packages-data
inferior
packages
pkg-to-replacement-hash-table))))))))
(with-resource-from-pool postgresql-connection-pool conn
(insert-packages conn packages-data))))))
@ -1534,94 +1831,99 @@
(define (extract-and-store-package-derivations)
(define packages-count
(vector-length
(assq-ref (fibers-force packages-data-promise)
'names)))
(with-resource-from-pool inf-and-store-pool res
(match res
((inferior . inferior-store)
(ensure-gds-inferior-packages-defined! inferior)
(define chunk-size 3000)
(inferior-eval '(vector-length gds-inferior-packages) inferior)))))
(fibers-for-each
(match-lambda
((system . target)
(let loop ((wal-bytes (stat:size (stat "/var/guix/db/db.sqlite-wal"))))
(when (> wal-bytes (* 512 (expt 2 20)))
(simple-format #t "debug: guix-daemon WAL is large (~A), waiting\n"
wal-bytes)
(define chunk-size 5000)
(sleep 30)
(loop (stat:size (stat "/var/guix/db/db.sqlite-wal")))))
(define (process-system-and-target system target)
(let loop ((wal-bytes
(catch #t
(lambda ()
(stat:size (stat "/var/guix/db/db.sqlite-wal")))
(lambda _ 0))))
(when (> wal-bytes (* 512 (expt 2 20)))
(simple-format #t "debug: guix-daemon WAL is large (~A), waiting\n"
wal-bytes)
(sleep 30)
(loop (catch #t
(lambda ()
(stat:size (stat "/var/guix/db/db.sqlite-wal")))
(lambda _ 0)))))
(with-time-logging
(simple-format #f "processing derivations for ~A" (cons system target))
(let ((derivations-vector (make-vector packages-count)))
(with-time-logging
(simple-format #f "getting derivations for ~A" (cons system target))
(let loop ((start-index 0))
(if (>= (+ start-index chunk-size) packages-count)
(let* ((remaining-count
(- packages-count start-index))
(chunk
(with-resource-from-pool inf-and-store-pool res
(match res
((inferior . inferior-store)
(ensure-gds-inferior-packages-defined! inferior)
(let* ((count
(if (>= (+ start-index chunk-size) packages-count)
(- packages-count start-index)
chunk-size))
(chunk
(with-resource-from-pool inf-and-store-pool res
(match res
((inferior . inferior-store)
(ensure-gds-inferior-packages-defined! inferior)
(inferior-package-derivations
inferior-store
inferior
system
target
start-index
remaining-count))))))
(vector-copy! derivations-vector
start-index
chunk))
(let ((chunk
(with-resource-from-pool inf-and-store-pool res
(match res
((inferior . inferior-store)
(ensure-gds-inferior-packages-defined! inferior)
(inferior-package-derivations
inferior-store
inferior
system
target
start-index
count))))))
(vector-copy! derivations-vector
start-index
chunk)
(unless (>= (+ start-index chunk-size) packages-count)
(loop (+ start-index chunk-size))))))
(inferior-package-derivations
inferior-store
inferior
system
target
start-index
chunk-size))))))
(vector-copy! derivations-vector
start-index
chunk)
(loop (+ start-index chunk-size))))))
(let* ((derivation-ids
(with-time-logging
(simple-format #f "derivation-file-names->derivation-ids (~A ~A)"
system target)
(derivation-file-names->derivation-ids/fiberized
derivations-vector))))
(let ((package-ids (fibers-force package-ids-promise)))
(with-resource-from-pool postgresql-connection-pool conn
(let* ((derivation-ids
(let* ((package-ids (fibers-force package-ids-promise))
(package-derivation-ids
(with-resource-from-pool postgresql-connection-pool conn
(with-time-logging
(simple-format #f "derivation-file-names->derivation-ids (~A ~A)"
(simple-format #f "insert-package-derivations (~A ~A)"
system target)
(derivation-file-names->derivation-ids
conn
derivations-vector))))
(insert-package-derivations conn
system
(or target "")
package-ids
derivation-ids)))))
(chunk-for-each!
(lambda (package-derivation-ids-chunk)
(with-resource-from-pool postgresql-connection-pool conn
(insert-guix-revision-package-derivations
conn
guix-revision-id
package-derivation-ids-chunk)))
2000
package-derivation-ids))))))
(let ((package-derivation-ids
(with-time-logging
(simple-format #f "insert-package-derivations (~A ~A)"
system target)
(insert-package-derivations conn
system
(or target "")
package-ids
derivation-ids))))
(chunk-for-each! (lambda (package-derivation-ids-chunk)
(insert-guix-revision-package-derivations
conn
guix-revision-id
package-derivation-ids-chunk))
2000
package-derivation-ids))))))))
(with-resource-from-pool inf-and-store-pool res
(match res
((inferior . inferior-store)
(inferior-fetch-system-target-pairs inferior)))))
(let ((process-system-and-target/fiberized
(fiberize process-system-and-target
#:parallelism parallelism)))
(par-map&
(match-lambda
((system . target)
(process-system-and-target/fiberized system target)))
(with-resource-from-pool inf-and-store-pool res
(match res
((inferior . inferior-store)
(inferior-fetch-system-target-pairs inferior))))))
(with-resource-from-pool postgresql-connection-pool conn
(with-time-logging
@ -1635,7 +1937,7 @@
(begin
(simple-format #t "debug: skipping system tests\n")
'())
(let ((data
(let ((data-with-derivation-file-names
(with-resource-from-pool inf-and-store-pool res
(match res
((inferior . inferior-store)
@ -1645,22 +1947,41 @@
inferior-store
guix-source
commit)))))))
(when data
(with-resource-from-pool postgresql-connection-pool conn
(insert-system-tests-for-guix-revision conn
guix-revision-id
data))))))
(when data-with-derivation-file-names
(let ((data-with-derivation-ids
(map (match-lambda
((name description derivation-file-names-by-system location-data)
(list name
description
(let ((systems
(map car derivation-file-names-by-system))
(derivation-ids
(derivation-file-names->derivation-ids/fiberized
(list->vector
(map cdr derivation-file-names-by-system)))))
(map cons systems derivation-ids))
location-data)))
data-with-derivation-file-names)))
(with-resource-from-pool postgresql-connection-pool conn
(insert-system-tests-for-guix-revision
conn
guix-revision-id
data-with-derivation-ids)))))))
(simple-format #t "debug: extract-information-from: ~A\n" store-path)
(parallel-via-fibers
(fibers-force package-ids-promise)
(extract-and-store-lint-checkers-and-warnings)
(extract-and-store-package-derivations)
(extract-and-store-system-tests)))
(with-time-logging
(simple-format #f "extract-information-from: ~A\n" store-path)
(parallel-via-fibers
(fibers-force package-ids-promise)
(extract-and-store-package-derivations)
(extract-and-store-system-tests)
(extract-and-store-lint-checkers-and-warnings)))
#t)
(prevent-inlining-for-tests extract-information-from)
(define (load-channel-instances git-repository-id commit
(define (load-channel-instances utility-thread-channel
git-repository-id commit
channel-derivations-by-system)
;; Load the channel instances in a different transaction, so that this can
;; commit prior to the outer transaction
@ -1685,19 +2006,35 @@
(guix-revision-id
(or existing-guix-revision-id
(insert-guix-revision channel-instances-conn
git-repository-id commit))))
git-repository-id commit)))
(postgresql-connection-pool
(make-resource-pool
(const channel-instances-conn)
1
#:min-size 0)))
(unless existing-guix-revision-id
(insert-channel-instances channel-instances-conn
guix-revision-id
(filter-map
(match-lambda
((system . derivations)
(and=>
(assoc-ref derivations
'manifest-entry-item)
(lambda (drv)
(cons system drv)))))
channel-derivations-by-system))
(let* ((derivations-by-system
(filter-map
(match-lambda
((system . derivations)
(and=>
(assoc-ref derivations
'manifest-entry-item)
(lambda (drv)
(cons system drv)))))
channel-derivations-by-system))
(derivation-ids
(derivation-file-names->derivation-ids
postgresql-connection-pool
utility-thread-channel
(list->vector (map cdr derivations-by-system)))))
(insert-channel-instances channel-instances-conn
guix-revision-id
(map cons
(map car derivations-by-system)
(vector->list derivation-ids))))
(simple-format
(current-error-port)
"guix-data-service: saved the channel instance derivations to the database\n"))
@ -1709,6 +2046,13 @@
(define* (load-new-guix-revision conn git-repository-id commit
#:key skip-system-tests? parallelism
extra-inferior-environment-variables)
(define utility-thread-channel
(make-worker-thread-channel
(const '())
#:parallelism parallelism))
(%worker-thread-default-timeout #f)
(let* ((git-repository-fields
(select-git-repository conn git-repository-id))
(git-repository-url
@ -1727,7 +2071,8 @@
fetch-with-authentication?
#:parallelism parallelism))
(guix-revision-id
(load-channel-instances git-repository-id commit
(load-channel-instances utility-thread-channel
git-repository-id commit
channel-derivations-by-system)))
(let ((store-item
(channel-derivations-by-system->guix-store-item
@ -1737,6 +2082,7 @@
(extract-information-from conn
guix-revision-id
commit guix-source store-item
utility-thread-channel
#:skip-system-tests?
skip-system-tests?
#:extra-inferior-environment-variables

View file

@ -22,36 +22,28 @@
#:use-module (json)
#:use-module (guix utils)
#:use-module (guix-data-service model utils)
#:use-module (guix-data-service model derivation)
#:export (insert-channel-instances
channel-instances-exist-for-guix-revision?
select-channel-instances-for-guix-revision))
(define (insert-channel-instances conn
guix-revision-id
derivations-by-system)
(let ((derivation-ids
(derivation-file-names->derivation-ids
conn
(list->vector
(map cdr derivations-by-system)))))
(exec-query
conn
(string-append
"
derivation-ids-by-system)
(exec-query
conn
(string-append
"
INSERT INTO channel_instances
(guix_revision_id, system, derivation_id)
VALUES "
(string-join
(map (lambda (system derivation-id)
(simple-format #f "(~A, '~A', ~A)"
guix-revision-id
system
derivation-id))
(map car derivations-by-system)
(vector->list derivation-ids))
", "))))
(string-join
(map (lambda (derivation-id-and-system)
(simple-format #f "(~A, '~A', ~A)"
guix-revision-id
(car derivation-id-and-system)
(cdr derivation-id-and-system)))
derivation-ids-by-system)
", ")))
#t)
(define (channel-instances-exist-for-guix-revision? conn commit-hash)

View file

@ -23,13 +23,10 @@
#:use-module (ice-9 format)
#:use-module (ice-9 binary-ports)
#:use-module (rnrs bytevectors)
#:use-module (gcrypt hash)
#:use-module (squee)
#:use-module (json)
#:use-module (guix base16)
#:use-module (guix base32)
#:use-module (guix serialization)
#:use-module (lzlib)
#:use-module (guix inferior)
#:use-module (guix memoization)
#:use-module (guix derivations)
@ -58,6 +55,10 @@
select-fixed-output-package-derivations-in-revision
select-derivation-outputs-in-revision
fix-derivation-output-details-hash-encoding
insert-derivation-sources
insert-derivation-source-file-nar
insert-derivation-outputs
insert-derivation-inputs
derivation-output-details->derivation-output-details-ids
derivation-output-details-ids->derivation-output-details-set-id
select-derivations-by-revision-name-and-version
@ -66,7 +67,6 @@
select-existing-derivations
select-derivations-by-id
select-derivations-and-build-status
derivation-file-names->derivation-ids
update-derivation-inputs-statistics
vacuum-derivation-inputs-table
update-derivation-outputs-statistics
@ -1487,38 +1487,11 @@ INNER JOIN derivation_outputs
sources-ids))
(define (insert-derivation-source-file-nar conn id source-file)
(define missing?
(match (exec-query
conn
"SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
(list (number->string id)))
(() #t)
(_ #f)))
(when missing?
(let* ((nar-bytevector (call-with-values
(lambda ()
(open-bytevector-output-port))
(lambda (port get-bytevector)
(write-file source-file port)
(get-bytevector))))
(data-string (bytevector->base16-string
(call-with-values
(lambda ()
(open-bytevector-output-port))
(lambda (port get-bytevector)
(call-with-lzip-output-port port
(lambda (port)
(put-bytevector port nar-bytevector))
#:level 9)
(get-bytevector)))))
(hash (bytevector->nix-base32-string
(sha256 nar-bytevector)))
(uncompressed-size (bytevector-length nar-bytevector)))
(exec-query
conn
"
(define (insert-derivation-source-file-nar conn id
hash bytevector uncompressed-size)
(exec-query
conn
"
INSERT INTO derivation_source_file_nars (
derivation_source_file_id,
compression,
@ -1527,12 +1500,12 @@ INSERT INTO derivation_source_file_nars (
uncompressed_size,
data
) VALUES ($1, $2, $3, $4, $5, $6)"
(list (number->string id)
"lzip"
"sha256"
hash
(number->string uncompressed-size)
(string-append "\\x" data-string))))))
(list (number->string id)
"lzip"
"sha256"
hash
(number->string uncompressed-size)
(string-append "\\x" (bytevector->base16-string bytevector)))))
(define* (backfill-derivation-source-file-nars conn #:key
(batch-size 10000)
@ -1564,130 +1537,6 @@ LIMIT $1"
batch)
(when loop? (loop (missing-batch))))))
(define (insert-missing-derivations conn
derivation-ids-hash-table
derivations)
(define (ensure-input-derivations-exist input-derivation-file-names)
(unless (null? input-derivation-file-names)
(simple-format
#t "debug: ensure-input-derivations-exist: processing ~A derivations\n"
(length input-derivation-file-names))
(update-derivation-ids-hash-table! conn
derivation-ids-hash-table
(list->vector
input-derivation-file-names))
(simple-format
#t
"debug: ensure-input-derivations-exist: checking for missing input derivations\n")
(let ((missing-derivations-filenames
(filter (lambda (derivation-file-name)
(not (hash-ref derivation-ids-hash-table
derivation-file-name)))
input-derivation-file-names)))
(unless (null? missing-derivations-filenames)
(simple-format
#f
"debug: ensure-input-derivations-exist: inserting missing input derivations\n")
;; Ensure all the input derivations exist
(insert-missing-derivations
conn
derivation-ids-hash-table
(map read-derivation-from-file
missing-derivations-filenames))))))
(define (insert-into-derivations dervs)
(string-append
"INSERT INTO derivations "
"(file_name, builder, args, env_vars, system_id) VALUES "
(string-join
(map (match-lambda
(($ <derivation> outputs inputs sources
system builder args env-vars file-name)
(simple-format
#f "('~A', '~A', ARRAY[~A]::varchar[], ARRAY[~A], '~A')"
file-name
builder
(string-join (map quote-string args) ",")
(string-join (map (match-lambda
((key . value)
(string-append
"['" key '"', $$"
value "$$ ]")))
env-vars)
",")
(system->system-id conn system))))
dervs)
",")
" RETURNING id"
";"))
(with-time-logging
(simple-format
#f "insert-missing-derivations: inserting ~A derivations"
(length derivations))
(let ((derivation-ids
(append-map
(lambda (chunk)
(map (lambda (result)
(string->number (car result)))
(exec-query conn (insert-into-derivations chunk))))
(chunk derivations 500))))
(with-time-logging
"insert-missing-derivations: updating hash table"
(for-each (lambda (derivation derivation-id)
(hash-set! derivation-ids-hash-table
(derivation-file-name derivation)
derivation-id))
derivations
derivation-ids))
(with-time-logging
"insert-missing-derivations: inserting outputs"
(for-each (lambda (derivation-id derivation)
(insert-derivation-outputs conn
derivation-id
(derivation-outputs derivation)))
derivation-ids
derivations))
(with-time-logging
"insert-missing-derivations: inserting sources"
(for-each (lambda (derivation-id derivation)
(let ((sources (derivation-sources derivation)))
(unless (null? sources)
(let ((sources-ids
(insert-derivation-sources conn
derivation-id
sources)))
(map (lambda (id source-file)
(insert-derivation-source-file-nar conn
id
source-file))
sources-ids
sources)))))
derivation-ids
derivations))
(with-time-logging
"insert-missing-derivations: ensure-input-derivations-exist"
(ensure-input-derivations-exist (deduplicate-strings
(map derivation-input-path
(append-map derivation-inputs
derivations)))))
(with-time-logging
(simple-format
#f "insert-missing-derivations: inserting inputs for ~A derivations"
(length derivations))
(insert-derivation-inputs conn
derivation-ids
derivations))
derivation-ids)))
(define (select-derivations-by-id conn ids)
(define query
(string-append "SELECT id, file_name "
@ -1772,40 +1621,6 @@ WHERE " criteria ";"))
'()
sorted-derivations))
(define (update-derivation-ids-hash-table! conn
derivation-ids-hash-table
file-names)
(define file-names-count (vector-length file-names))
(simple-format #t "debug: update-derivation-ids-hash-table!: ~A file-names\n"
file-names-count)
(let ((missing-file-names
(vector-fold
(lambda (_ result file-name)
(if (and file-name
(hash-ref derivation-ids-hash-table
file-name))
result
(cons file-name result)))
'()
file-names)))
(simple-format
#t "debug: update-derivation-ids-hash-table!: lookup ~A file-names, ~A not cached\n"
file-names-count (length missing-file-names))
(unless (null? missing-file-names)
(for-each
(lambda (chunk)
(for-each
(match-lambda
((id file-name)
(hash-set! derivation-ids-hash-table
file-name
(string->number id))))
(exec-query conn (select-existing-derivations chunk))))
(chunk! missing-file-names 1000)))))
(define (insert-source-files-missing-nars conn derivation-ids)
(define (derivation-ids->next-related-derivation-ids! ids seen-ids)
(delete-duplicates/sort!
@ -1888,71 +1703,6 @@ INNER JOIN derivation_source_files
next-related-derivation-ids
seen-ids))))))
(define (derivation-file-names->derivation-ids conn derivation-file-names)
(define derivations-count
(vector-length derivation-file-names))
(if (= 0 derivations-count)
#()
(let* ((derivation-ids-hash-table (make-hash-table
;; Account for more derivations in
;; the graph
(* 2 derivations-count))))
(simple-format
#t "debug: derivation-file-names->derivation-ids: processing ~A derivations\n"
derivations-count)
(update-derivation-ids-hash-table! conn
derivation-ids-hash-table
derivation-file-names)
(let ((missing-derivation-filenames
(deduplicate-strings
(vector-fold
(lambda (_ result derivation-file-name)
(if (not derivation-file-name)
result
(if (hash-ref derivation-ids-hash-table
derivation-file-name)
result
(cons derivation-file-name result))))
'()
derivation-file-names))))
(chunk-for-each!
(lambda (missing-derivation-filenames-chunk)
(let ((missing-derivations-chunk
(with-time-logging
(simple-format #f "reading ~A missing derivations"
(length missing-derivation-filenames-chunk))
(map read-derivation-from-file
;; Do the filter again, since processing the last
;; chunk might have inserted some of the
;; derivations in this chunk
(filter (lambda (derivation-file-name)
(not (hash-ref derivation-ids-hash-table
derivation-file-name)))
missing-derivation-filenames-chunk)))))
(unless (null? missing-derivations-chunk)
(insert-missing-derivations conn
derivation-ids-hash-table
missing-derivations-chunk))))
1000
missing-derivation-filenames)
(let ((all-ids
(vector-map
(lambda (_ derivation-file-name)
(if derivation-file-name
(or (hash-ref derivation-ids-hash-table
derivation-file-name)
(error "missing derivation id"))
#f))
derivation-file-names)))
all-ids)))))
(define (update-derivation-inputs-statistics conn)
(let ((query
"

View file

@ -23,7 +23,6 @@
#:use-module (guix utils)
#:use-module (guix-data-service model utils)
#:use-module (guix-data-service model location)
#:use-module (guix-data-service model derivation)
#:export (insert-system-tests-for-guix-revision
select-system-tests-for-guix-revision
@ -39,7 +38,7 @@
"system_tests"
'(name description location_id)
(map (match-lambda
((name description derivation-file-names-by-system location-data)
((name description derivation-ids-by-system location-data)
(list name
description
(location->location-id
@ -48,20 +47,13 @@
system-test-data)))
(data
(append-map
(lambda (system-test-id derivation-file-names-by-system)
(let ((systems
(map car derivation-file-names-by-system))
(derivation-ids
(derivation-file-names->derivation-ids
conn
(map cdr derivation-file-names-by-system))))
(map (lambda (system derivation-id)
(list guix-revision-id
system-test-id
derivation-id
system))
systems
derivation-ids)))
(lambda (system-test-id derivation-ids-by-system)
(map (lambda (system-and-derivation-id)
(list guix-revision-id
system-test-id
(cdr system-and-derivation-id)
(car system-and-derivation-id)))
derivation-ids-by-system))
system-test-ids
(map third system-test-data))))