Start storing and identifying sets of derivation outputs

Derivations are effectively equivalent if they produce the same set of
outputs, which is possible because of the equivalence of fixed output
derivations. A fixed output derivation can be different, but equivalent,
because it produces the same fixed output.

To better allow tracking equivalent derivations, primarily to allow working
out what derivations might correspond to a build, store the sets of derivation
outputs, and which derivations they relate to.
This commit is contained in:
Christopher Baines 2019-12-07 19:21:02 +01:00
parent 0e97c55c0c
commit a965ba1f6a
5 changed files with 129 additions and 13 deletions

View file

@ -289,19 +289,63 @@ ORDER BY derivations.file_name
";"))
(define (insert-into-derivation-outputs output-names
derivation-output-ids)
derivation-output-details-ids)
(string-append "INSERT INTO derivation_outputs "
"(derivation_id, name, derivation_output_details_id) VALUES "
(string-join
(map (lambda (output-name derivation-output-id)
(map (lambda (output-name derivation-output-details-id)
(simple-format
#f "(~A, '~A', ~A)"
derivation-id output-name derivation-output-id))
derivation-id
output-name
derivation-output-details-id))
output-names
derivation-output-ids)
derivation-output-details-ids)
",")
";"))
(define (select-derivation-output-details-sets-id derivation-output-details-ids)
(match (exec-query
conn
(string-append
"
SELECT id
FROM derivation_output_details_sets
WHERE derivation_output_details_ids = ARRAY["
(string-join (map number->string
derivation-output-details-ids)
",")
"]"))
(((id))
(string->number id))
(_ #f)))
(define (insert-into-derivation-output-details-sets
derivation-output-details-ids)
(match (exec-query
conn
(string-append
"
INSERT INTO derivation_output_details_sets (derivation_output_details_ids)
VALUES (ARRAY["
(string-join (map number->string derivation-output-details-ids)
",")
"])
RETURNING id"))
(((id))
(string->number id))))
(define (insert-into-derivations-by-output-details-set
derivation_output_details_set_id)
(exec-query
conn
"
INSERT INTO derivations_by_output_details_set
(derivation_id, derivation_output_details_set_id)
VALUES ($1, $2)"
(list (number->string derivation-id)
(number->string derivation_output_details_set_id))))
(let* ((derivation-outputs (map cdr names-and-derivation-outputs))
(derivation-output-paths (map derivation-output-path
derivation-outputs))
@ -333,14 +377,15 @@ ORDER BY derivations.file_name
(two-lists->vhash (map derivation-output-path missing-entries)
new-derivation-output-details-ids))
(derivation-output-ids
(derivation-output-details-ids
(map (lambda (path)
(string->number
(cdr
(or (vhash-assoc path
existing-derivation-output-details-entries)
(vhash-assoc path
new-entries-id-lookup-vhash)
(error "missing derivation output details entry"))))
(error "missing derivation output details entry")))))
derivation-output-paths))
(derivation-output-names
@ -348,9 +393,14 @@ ORDER BY derivations.file_name
(exec-query conn
(insert-into-derivation-outputs derivation-output-names
derivation-output-ids))
derivation-output-details-ids))
derivation-output-ids))
(insert-into-derivations-by-output-details-set
(or
(select-derivation-output-details-sets-id derivation-output-details-ids)
(insert-into-derivation-output-details-sets derivation-output-details-ids)))
derivation-output-details-ids))
(define (select-derivation-by-file-name conn file-name)
(define query

View file

@ -0,0 +1,50 @@
-- Deploy guix-data-service:derivation_output_sets to pg
BEGIN;
CREATE TABLE derivation_output_details_sets (
id integer PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
derivation_output_details_ids integer[] NOT NULL,
UNIQUE (derivation_output_details_ids)
);
CREATE TABLE derivations_by_output_details_set (
derivation_id integer REFERENCES derivations (id),
derivation_output_details_set_id integer REFERENCES derivation_output_details_sets (id),
PRIMARY KEY (derivation_id, derivation_output_details_set_id),
UNIQUE (derivation_id)
);
CREATE INDEX derivations_by_output_details_set_id_idx
ON derivations_by_output_details_set (derivation_output_details_set_id);
INSERT INTO derivation_output_details_sets (derivation_output_details_ids) (
SELECT DISTINCT ARRAY_AGG(
derivation_output_details_id
ORDER BY derivation_output_details_id
)
FROM derivation_outputs
GROUP BY derivation_id
);
INSERT INTO derivations_by_output_details_set (
SELECT derivation_id, derivation_output_details_sets.id
FROM (
SELECT derivation_id,
derivation_output_details_ids
FROM (
SELECT derivation_id,
ARRAY_AGG(
derivation_output_details_id
ORDER BY derivation_output_details_id
) AS derivation_output_details_ids
FROM derivation_outputs
GROUP BY derivation_id
) AS derivation_output_groups
) data
INNER JOIN derivation_output_details_sets
ON data.derivation_output_details_ids =
derivation_output_details_sets.derivation_output_details_ids
);
COMMIT;

View file

@ -0,0 +1,8 @@
-- Revert guix-data-service:derivation_output_sets from pg
BEGIN;
DROP TABLE derivations_by_output_details_set;
DROP TABLE derivation_output_details_sets;
COMMIT;

View file

@ -36,3 +36,4 @@ change_nars_size_to_bigint 2019-12-04T21:24:21Z <chris@phact> # Change nars.si
change_nar_urls_size_to_bigint 2019-12-04T21:49:07Z <chris@phact> # Change nar_urls.size to bigint
sort_out_duplicate_builds 2019-12-05T12:43:53Z <chris@phact> # Sort out duplicate builds
add_some_database_indexes 2019-12-05T15:53:04Z Christopher Baines <mail@cbaines.net> # Add some indexes
derivation_output_sets 2019-12-05T23:19:05Z Christopher Baines <mail@cbaines.net> # Describe the sets of derivation outputs

View file

@ -0,0 +1,7 @@
-- Verify guix-data-service:derivation_output_sets on pg
BEGIN;
-- XXX Add verifications here.
ROLLBACK;