Rework loading revision data

These changes were motivated by switching to a mechanism of loading data that
isn't dependent on the big advisory lock that prevents more than one revision
from being processed at a time.

Since INSERT ... RETURNING id; is used, this can block if another transaction
inserts the same data, and then cause an error when that transaction
commits. The solution is to use ON CONFLICT DO NOTHING, but you have to handle
the case when the INSERT doesn't return an id since the other transaction has
inserted it.

This commit rewrites insert-missing-data-and-return-all-ids to do as described
above, as well as being more efficient in how existing data is detected and to
use more vectors. Other utilities for inserting data are added as well.
This commit is contained in:
Christopher Baines 2024-12-05 20:56:23 +00:00
parent b128e9bd7a
commit 5ed98343d7
25 changed files with 874 additions and 1149 deletions

View file

@ -982,76 +982,30 @@ LOCK TABLE ONLY derivation_output_details
conn
"derivation_output_details"
'(path hash_algorithm hash recursive)
(map (lambda (details)
(list (assq-ref details 'path)
(or (non-empty-string-or-false
(assq-ref details 'hash_algorithm))
NULL)
(or (non-empty-string-or-false
(assq-ref details 'hash))
NULL)
(assq-ref details 'recursive)))
derivation-output-details)))
(list->vector
(map (lambda (details)
(list (assq-ref details 'path)
(or (non-empty-string-or-false
(assq-ref details 'hash_algorithm))
NULL)
(or (non-empty-string-or-false
(assq-ref details 'hash))
NULL)
(assq-ref details 'recursive)))
derivation-output-details))))
(define (derivation-output-details-ids->derivation-output-details-set-id
conn
derivation-output-details-ids)
(define sorted-derivation-output-details-ids
(sort derivation-output-details-ids <))
(define (select-derivation-output-details-sets-id)
(match (exec-query
conn
(string-append
"
SELECT id
FROM derivation_output_details_sets
WHERE derivation_output_details_ids = ARRAY["
(string-join (map number->string
sorted-derivation-output-details-ids)
",")
"]"))
(((id))
(string->number id))
(_ #f)))
(define (insert-into-derivation-output-details-sets)
(match (exec-query
conn
(string-append
"
INSERT INTO derivation_output_details_sets (derivation_output_details_ids)
VALUES (ARRAY["
(string-join (map number->string
sorted-derivation-output-details-ids)
",")
"])
RETURNING id"))
(((id))
(string->number id))))
(or (select-derivation-output-details-sets-id)
(insert-into-derivation-output-details-sets)))
(insert-and-return-id
conn
"derivation_output_details_sets"
'(derivation_output_details_ids)
(list (sort derivation-output-details-ids <))))
(define (insert-derivation-outputs conn
derivation-id
names-and-derivation-outputs)
(define (insert-into-derivation-outputs output-names
derivation-output-details-ids)
(string-append "INSERT INTO derivation_outputs "
"(derivation_id, name, derivation_output_details_id) VALUES "
(string-join
(map (lambda (output-name derivation-output-details-id)
(simple-format
#f "(~A, '~A', ~A)"
derivation-id
output-name
derivation-output-details-id))
output-names
derivation-output-details-ids)
",")
";"))
(define (insert-into-derivations-by-output-details-set
derivation_output_details_set_id)
(exec-query
@ -1059,13 +1013,16 @@ RETURNING id"))
"
INSERT INTO derivations_by_output_details_set
(derivation_id, derivation_output_details_set_id)
VALUES ($1, $2)"
VALUES ($1, $2)
ON CONFLICT DO NOTHING"
(list (number->string derivation-id)
(number->string derivation_output_details_set_id))))
(let* ((derivation-outputs (map cdr names-and-derivation-outputs))
(derivation-output-paths (map derivation-output-path
derivation-outputs))
(let* ((derivation-outputs
(map cdr names-and-derivation-outputs))
(derivation-output-paths
(map derivation-output-path
derivation-outputs))
(derivation-output-names
(map car names-and-derivation-outputs))
@ -1083,10 +1040,17 @@ VALUES ($1, $2)"
(recursive . ,recursive?))))
derivation-outputs))))
(exec-query conn
(insert-into-derivation-outputs
derivation-output-names
derivation-output-details-ids))
(insert-missing-data
conn
"derivation_outputs"
'(derivation_id name derivation_output_details_id)
(list->vector
(map (lambda (output-name derivation-output-details-id)
(list derivation-id
output-name
derivation-output-details-id))
derivation-output-names
(vector->list derivation-output-details-ids))))
(insert-into-derivations-by-output-details-set
(derivation-output-details-ids->derivation-output-details-set-id
@ -1434,8 +1398,8 @@ WHERE derivation_source_files.store_path = $1"
"', '" sub-derivation "')"))
sub-derivations))))
(derivation-inputs derivation)))
derivation-ids
derivations)))
(vector->list derivation-ids)
(vector->list derivations))))
(chunk-for-each!
(lambda (query-parts-chunk)
@ -1452,18 +1416,11 @@ INNER JOIN derivations
ON derivations.file_name = vals.file_name
INNER JOIN derivation_outputs
ON derivation_outputs.derivation_id = derivations.id
AND vals.output_name = derivation_outputs.name")))
AND vals.output_name = derivation_outputs.name
ON CONFLICT DO NOTHING")))
1000
query-parts)))
(define (select-from-derivation-source-files store-paths)
(string-append
"SELECT id, store_path FROM derivation_source_files "
"WHERE store_path IN ("
(string-join (map quote-string store-paths)
",")
");"))
(define (insert-derivation-sources conn derivation-id sources)
(define (insert-into-derivation-sources derivation-source-file-ids)
(string-append
@ -1473,16 +1430,17 @@ INNER JOIN derivation_outputs
(map (lambda (derivation-source-file-id)
(simple-format
#f "(~A, ~A)" derivation-id derivation-source-file-id))
derivation-source-file-ids)
(vector->list derivation-source-file-ids))
",")
";"))
"ON CONFLICT DO NOTHING;"))
(let ((sources-ids
(insert-missing-data-and-return-all-ids
conn
"derivation_source_files"
'(store_path)
(map list sources))))
(list->vector
(map list sources)))))
(exec-query conn
(insert-into-derivation-sources sources-ids))
@ -1501,7 +1459,8 @@ INSERT INTO derivation_source_file_nars (
hash,
uncompressed_size,
data
) VALUES ($1, $2, $3, $4, $5, $6)"
) VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT DO NOTHING"
(list (number->string id)
"lzip"
"sha256"
@ -1520,7 +1479,8 @@ INSERT INTO derivation_source_file_nars (
hash,
uncompressed_size,
data
) VALUES ($1, $2, $3, $4, $5, $6)"
) VALUES ($1, $2, $3, $4, $5, $6)
ON CONFLICT DO NOTHING"
(list (number->string id)
"lzip"
"sha256"