Partition the package_derivations_by_guix_revision_range table

And create a proper git_branches table in the process.

I'm hoping this will help with slow deletions from the
package_derivations_by_guix_revision_range table in the case where there are
lots of branches, since it'll separate the data for one branch from another.

These migrations will remove the existing data, so
rebuild-package-derivations-table will currently need manually running to
regenerate it.
This commit is contained in:
Christopher Baines 2022-05-23 19:10:25 +01:00
parent 89545caa3f
commit 64be52844e
19 changed files with 442 additions and 167 deletions

View file

@ -21,32 +21,52 @@
#:use-module (squee)
#:use-module (srfi srfi-19)
#:use-module (guix-data-service model utils)
#:export (insert-git-branch-entry
#:export (git-branch-for-repository-and-name
insert-git-branch-entry
git-branches-for-commit
git-branches-with-repository-details-for-commit
most-recent-commits-for-branch
latest-processed-commit-for-branch
all-branches-with-most-recent-commit))
(define (git-branch-for-repository-and-name conn
git-repository-id
name)
(match (exec-query
conn
"
SELECT id
FROM git_branches
WHERE git_repository_id = $1
AND name = $2"
(list (number->string git-repository-id)
name))
(#f #f)
(((id)) (string->number id))))
(define (insert-git-branch-entry conn
name commit
git-repository-id datetime)
(exec-query
conn
(string-append
"INSERT INTO git_branches (name, commit, git_repository_id, datetime) "
"VALUES ($1, $2, $3, to_timestamp($4)) "
"ON CONFLICT DO NOTHING")
(list name
commit
(number->string git-repository-id)
(date->string datetime "~s"))))
git-repository-id
name)
(match (exec-query
conn
"
INSERT INTO git_branches (git_repository_id, name)
VALUES ($1, $2)
RETURNING id"
(list (number->string git-repository-id)
name))
(((id))
(string->number id))))
(define (git-branches-for-commit conn commit)
(define query
"
SELECT name, datetime FROM git_branches WHERE commit = $1
ORDER BY datetime DESC")
SELECT name, git_commits.datetime
FROM git_commits
INNER JOIN git_branches
ON git_commits.git_branch_id = git_branches.id
WHERE commit = $1
ORDER BY git_commits.datetime DESC")
(exec-query conn query (list commit)))
@ -55,16 +75,49 @@ ORDER BY datetime DESC")
"
SELECT git_repositories.id, git_repositories.label,
git_repositories.url, git_repositories.cgit_url_base,
git_branches.name, git_branches.datetime
FROM git_branches
git_branches.name, git_commits.datetime
FROM git_commits
INNER JOIN git_branches
ON git_commits.git_branch_id = git_branches.id
INNER JOIN git_repositories
ON git_branches.git_repository_id = git_repositories.id
WHERE git_branches.commit = $1")
WHERE git_commits.commit = $1")
(group-list-by-first-n-fields
4
(exec-query conn query (list commit))))
(define* (latest-processed-commit-for-branch conn repository-id branch-name)
(define query
(string-append
"
SELECT git_commits.commit
FROM git_branches
INNER JOIN git_commits
ON git_branches.id = git_commits.git_branch_id
INNER JOIN guix_revisions
ON git_commits.commit = guix_revisions.commit
INNER JOIN load_new_guix_revision_jobs
ON load_new_guix_revision_jobs.commit = guix_revisions.commit
INNER JOIN load_new_guix_revision_job_events
ON job_id = load_new_guix_revision_jobs.id
WHERE guix_revisions.git_repository_id = $1
AND git_branches.git_repository_id = $1
AND git_branches.name = $2
AND load_new_guix_revision_job_events.event = 'success'
ORDER BY datetime DESC
LIMIT 1"))
(match (exec-query
conn
query
(list repository-id branch-name))
(((commit-hash))
commit-hash)
('()
#f)))
(define* (most-recent-commits-for-branch conn git-repository-id
branch-name
#:key
@ -74,7 +127,7 @@ WHERE git_branches.commit = $1")
(define query
(string-append
"
SELECT git_branches.commit,
SELECT git_commits.commit,
datetime,
(
load_new_guix_revision_job_events.event IS NOT NULL
@ -84,12 +137,14 @@ SELECT git_branches.commit,
FROM load_new_guix_revision_job_events
INNER JOIN load_new_guix_revision_jobs ON
load_new_guix_revision_jobs.id = load_new_guix_revision_job_events.job_id
WHERE load_new_guix_revision_jobs.commit = git_branches.commit AND
WHERE load_new_guix_revision_jobs.commit = git_commits.commit AND
git_branches.git_repository_id = load_new_guix_revision_jobs.git_repository_id
) AS job_events
FROM git_branches
INNER JOIN git_commits
ON git_branches.id = git_commits.git_branch_id
LEFT OUTER JOIN guix_revisions
ON git_branches.commit = guix_revisions.commit
ON git_commits.commit = guix_revisions.commit
LEFT JOIN load_new_guix_revision_jobs
ON load_new_guix_revision_jobs.commit = guix_revisions.commit
LEFT JOIN load_new_guix_revision_job_events
@ -129,40 +184,12 @@ LIMIT " (number->string limit))
(list branch-name
(number->string git-repository-id)))))
(define* (latest-processed-commit-for-branch conn repository-id branch-name)
(define query
(string-append
"
SELECT git_branches.commit
FROM git_branches
INNER JOIN guix_revisions
ON git_branches.commit = guix_revisions.commit
INNER JOIN load_new_guix_revision_jobs
ON load_new_guix_revision_jobs.commit = guix_revisions.commit
INNER JOIN load_new_guix_revision_job_events
ON job_id = load_new_guix_revision_jobs.id
WHERE guix_revisions.git_repository_id = $1
AND git_branches.git_repository_id = $1
AND git_branches.name = $2
AND load_new_guix_revision_job_events.event = 'success'
ORDER BY datetime DESC
LIMIT 1"))
(match (exec-query
conn
query
(list repository-id branch-name))
(((commit-hash))
commit-hash)
('()
#f)))
(define (all-branches-with-most-recent-commit conn git-repository-id)
(define query
(string-append
"
SELECT DISTINCT ON (name)
name, git_branches.commit,
name, git_commits.commit,
datetime,
(
load_new_guix_revision_jobs.succeeded_at IS NOT NULL
@ -172,14 +199,16 @@ SELECT DISTINCT ON (name)
FROM load_new_guix_revision_job_events
INNER JOIN load_new_guix_revision_jobs ON
load_new_guix_revision_jobs.id = load_new_guix_revision_job_events.job_id
WHERE load_new_guix_revision_jobs.commit = git_branches.commit AND
WHERE load_new_guix_revision_jobs.commit = git_commits.commit AND
git_branches.git_repository_id = load_new_guix_revision_jobs.git_repository_id
) AS job_events
FROM git_branches
INNER JOIN git_commits
ON git_branches.id = git_commits.git_branch_id
LEFT OUTER JOIN guix_revisions
ON git_branches.commit = guix_revisions.commit
ON git_commits.commit = guix_revisions.commit
LEFT JOIN load_new_guix_revision_jobs
ON git_branches.commit = load_new_guix_revision_jobs.commit
ON git_commits.commit = load_new_guix_revision_jobs.commit
AND git_branches.git_repository_id = load_new_guix_revision_jobs.git_repository_id
WHERE git_branches.git_repository_id = $1
ORDER BY name, datetime DESC"))
@ -199,4 +228,3 @@ ORDER BY name, datetime DESC"))
conn
query
(list (number->string git-repository-id)))))

View file

@ -0,0 +1,38 @@
;;; Guix Data Service -- Information about Guix over time
;;; Copyright © 2019 Christopher Baines <mail@cbaines.net>
;;;
;;; This program is free software: you can redistribute it and/or
;;; modify it under the terms of the GNU Affero General Public License
;;; as published by the Free Software Foundation, either version 3 of
;;; the License, or (at your option) any later version.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with this program. If not, see
;;; <http://www.gnu.org/licenses/>.
(define-module (guix-data-service model git-commit)
#:use-module (ice-9 match)
#:use-module (json)
#:use-module (squee)
#:use-module (srfi srfi-19)
#:use-module (guix-data-service model utils)
#:export (insert-git-commit-entry))
(define (insert-git-commit-entry conn
git-branch-id
commit
datetime)
(exec-query
conn
"
INSERT INTO git_commits (commit, git_branch_id, datetime)
VALUES ($1, $2, to_timestamp($3))
ON CONFLICT DO NOTHING"
(list commit
(number->string git-branch-id)
(date->string datetime "~s"))))

View file

@ -163,6 +163,8 @@ SELECT id, label, url, cgit_url_base
FROM git_repositories WHERE id IN (
SELECT git_repository_id
FROM git_branches
INNER JOIN git_commits
ON git_branches.id = git_commits.git_branch_id
WHERE commit = $1
)")

View file

@ -95,17 +95,18 @@ INSERT INTO guix_revisions (git_repository_id, commit)
SELECT guix_revisions.id,
guix_revisions.commit,
guix_revisions.git_repository_id,
git_branches.datetime
git_commits.datetime
FROM guix_revisions
INNER JOIN git_branches
ON git_branches.commit = guix_revisions.commit
AND git_branches.git_repository_id = guix_revisions.git_repository_id
ON git_branches.git_repository_id = guix_revisions.git_repository_id
INNER JOIN git_commits
ON git_commits.commit = guix_revisions.commit
INNER JOIN load_new_guix_revision_jobs
ON load_new_guix_revision_jobs.commit = guix_revisions.commit
WHERE git_branches.name = $1
AND git_branches.datetime <= $2
AND git_commits.datetime <= $2
AND load_new_guix_revision_jobs.succeeded_at IS NOT NULL
ORDER BY git_branches.datetime DESC
ORDER BY git_commits.datetime DESC
LIMIT 1")
(match (exec-query conn query

View file

@ -19,9 +19,9 @@
#:use-module (ice-9 match)
#:use-module (squee)
#:use-module (guix-data-service database)
#:use-module (guix-data-service model git-branch)
#:use-module (guix-data-service utils)
#:export (delete-guix-revision-package-derivation-entries
insert-guix-revision-package-derivation-entries
#:export (insert-guix-revision-package-derivation-entries
update-package-derivations-table
vacuum-package-derivations-table
rebuild-package-derivations-table))
@ -47,17 +47,83 @@ WHERE git_repository_id = $1 AND
branch-name
guix-revision-id)))
(define* (insert-guix-revision-package-derivation-entries conn
git-repository-id
branch-name
#:key guix-revision-id)
(define (insert-guix-revision-package-derivation-entries conn
git-repository-id
branch-name)
(define query
(string-append
"
"
INSERT INTO package_derivations_by_guix_revision_range
SELECT DISTINCT
git_branches.git_repository_id,
git_branches.name AS branch_name,
$1,
packages.name AS package_name,
packages.version AS package_version,
revision_packages.derivation_id AS derivation_id,
revision_packages.system AS system,
revision_packages.target AS target,
first_value(guix_revisions.id)
OVER package_version AS first_guix_revision_id,
last_value(guix_revisions.id)
OVER package_version AS last_guix_revision_id
FROM packages
INNER JOIN (
SELECT package_derivations.package_id,
package_derivations.derivation_id,
systems.system,
package_derivations.target,
guix_revision_package_derivations.revision_id
FROM package_derivations
INNER JOIN systems
ON package_derivations.system_id = systems.id
INNER JOIN guix_revision_package_derivations
ON package_derivations.id = guix_revision_package_derivations.package_derivation_id
) AS revision_packages ON packages.id = revision_packages.package_id
INNER JOIN guix_revisions
ON guix_revisions.git_repository_id = $1
AND revision_packages.revision_id = guix_revisions.id
INNER JOIN git_branches
ON git_branches.id = $2
INNER JOIN git_commits
ON git_branches.id = git_commits.git_branch_id
AND guix_revisions.commit = git_commits.commit
WINDOW package_version AS (
PARTITION BY git_branches.git_repository_id, git_branches.name,
packages.name, packages.version, revision_packages.derivation_id
ORDER BY git_commits.datetime
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
ORDER BY packages.name, packages.version")
(let ((git-branch-id (git-branch-for-repository-and-name conn
(string->number
git-repository-id)
branch-name)))
(exec-query
conn
(string-append
"
CREATE TABLE package_derivations_by_guix_revision_range_git_branch_"
(number->string git-branch-id) "
PARTITION OF package_derivations_by_guix_revision_range FOR VALUES IN ("
(number->string git-branch-id)
")"))
(exec-query
conn
query
(list git-repository-id
(number->string git-branch-id)))))
(define (insert-guix-revision-package-derivation-entries-for-guix-revision
conn
git-repository-id
branch-name
guix-revision-id)
(define query
"
INSERT INTO package_derivations_by_guix_revision_range
SELECT DISTINCT
git_branches.id
packages.name AS package_name,
packages.version AS package_version,
revision_packages.derivation_id AS derivation_id,
@ -85,35 +151,30 @@ INNER JOIN guix_revisions
AND revision_packages.revision_id = guix_revisions.id
INNER JOIN git_branches
ON git_branches.name = $2
AND guix_revisions.commit = git_branches.commit
"
(if guix-revision-id
"WHERE
revision_packages.derivation_id IN (
INNER JOIN git_commits
ON git_branches.id = git_commits.git_branch_id
AND guix_revisions.commit = git_commits.commit
WHERE revision_packages.derivation_id IN (
SELECT package_derivations.derivation_id
FROM package_derivations
INNER JOIN guix_revision_package_derivations
ON package_derivations.id = guix_revision_package_derivations.package_derivation_id
WHERE revision_id = $3
)"
"")
"
)
WINDOW package_version AS (
PARTITION BY git_branches.git_repository_id, git_branches.name,
packages.name, packages.version, revision_packages.derivation_id
ORDER BY git_branches.datetime
ORDER BY git_commits.datetime
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
ORDER BY packages.name, packages.version"))
ORDER BY packages.name, packages.version")
(exec-query
conn
query
`(,git-repository-id
,branch-name
,@(if guix-revision-id
(list guix-revision-id)
'()))))
(list git-repository-id
branch-name
guix-revision-id)))
(define (update-package-derivations-table conn
git-repository-id
@ -140,14 +201,21 @@ LOCK TABLE ONLY package_derivations_by_guix_revision_range
(with-time-logging
(simple-format #f "inserting package derivation entries for ~A"
branch-name)
(insert-guix-revision-package-derivation-entries
(insert-guix-revision-package-derivation-entries-for-guix-revision
conn
git-repository-id
branch-name
#:guix-revision-id guix-revision-id))))
guix-revision-id))))
(exec-query
conn
"SELECT name FROM git_branches WHERE commit = $1 AND git_repository_id = $2"
"
SELECT name
FROM git_branches
INNER JOIN git_commits
ON git_branches.id = git_commits.git_branch_id
AND git_commits.commit = $1
WHERE git_repository_id = $2
"
(list commit git-repository-id)))
#t)

View file

@ -306,26 +306,28 @@ FROM (
-- be parameters?
package_version,
first_guix_revisions.commit AS first_guix_revision_commit,
first_git_branches.datetime AS first_datetime,
first_git_commits.datetime AS first_datetime,
last_guix_revisions.commit AS last_guix_revision_commit,
last_git_branches.datetime AS last_datetime
last_git_commits.datetime AS last_datetime
FROM package_derivations_by_guix_revision_range
INNER JOIN guix_revisions AS first_guix_revisions
ON first_guix_revision_id = first_guix_revisions.id
INNER JOIN git_branches AS first_git_branches
ON package_derivations_by_guix_revision_range.branch_name = first_git_branches.name
AND first_guix_revisions.git_repository_id = first_git_branches.git_repository_id
AND first_guix_revisions.commit = first_git_branches.commit
INNER JOIN git_branches
ON git_branches.git_repository_id = $2
AND git_branches.name = $3
INNER JOIN git_commits AS first_git_commits
ON first_git_commits.git_branch_id = git_branches.id
AND first_guix_revisions.git_repository_id = git_branches.git_repository_id
AND first_guix_revisions.commit = first_git_commits.commit
INNER JOIN guix_revisions AS last_guix_revisions
ON last_guix_revision_id = last_guix_revisions.id
INNER JOIN git_branches AS last_git_branches
ON package_derivations_by_guix_revision_range.branch_name = last_git_branches.name
AND last_guix_revisions.git_repository_id = last_git_branches.git_repository_id
AND last_guix_revisions.commit = last_git_branches.commit
AND last_guix_revisions.git_repository_id = git_branches.git_repository_id
INNER JOIN git_commits AS last_git_commits
ON last_git_commits.git_branch_id = git_branches.id
AND last_guix_revisions.commit = last_git_commits.commit
WHERE package_name = $1
AND package_derivations_by_guix_revision_range.git_repository_id = $2
AND package_derivations_by_guix_revision_range.branch_name = $3
ORDER BY package_version DESC, first_git_branches.datetime ASC
AND package_derivations_by_guix_revision_range.git_branch_id = git_branches.id
ORDER BY package_version DESC, first_git_commits.datetime ASC
) AS data1
WINDOW version_window AS (
PARTITION BY data1.package_version
@ -348,9 +350,9 @@ ORDER BY package_version DESC, first_datetime ASC"
SELECT package_version,
derivations.file_name,
first_guix_revisions.commit AS first_guix_revision_commit,
first_git_branches.datetime AS first_datetime,
first_git_commits.datetime AS first_datetime,
last_guix_revisions.commit AS last_guix_revision_commit,
last_git_branches.datetime AS last_datetime,
last_git_commits.datetime AS last_datetime,
JSON_AGG(
json_build_object(
'build_server_id', builds.build_server_id,
@ -374,19 +376,19 @@ LEFT OUTER JOIN latest_build_status
ON builds.id = latest_build_status.build_id
INNER JOIN guix_revisions AS first_guix_revisions
ON first_guix_revision_id = first_guix_revisions.id
INNER JOIN git_branches AS first_git_branches
ON first_guix_revisions.git_repository_id = first_git_branches.git_repository_id
AND first_guix_revisions.commit = first_git_branches.commit
INNER JOIN git_branches
ON git_branches.git_repository_id = $2
AND git_branches.name = $3
INNER JOIN git_commits AS first_git_commits
ON first_guix_revisions.commit = first_git_commits.commit
AND first_git_commits.git_branch_id = git_branches.id
INNER JOIN guix_revisions AS last_guix_revisions
ON last_guix_revision_id = last_guix_revisions.id
INNER JOIN git_branches AS last_git_branches
ON last_guix_revisions.git_repository_id = last_git_branches.git_repository_id
AND last_guix_revisions.commit = last_git_branches.commit
INNER JOIN git_commits AS last_git_commits
ON last_guix_revisions.commit = last_git_commits.commit
AND last_git_commits.git_branch_id = git_branches.id
WHERE package_name = $1
AND package_derivations_by_guix_revision_range.git_repository_id = $2
AND package_derivations_by_guix_revision_range.branch_name = $3
AND first_git_branches.name = $3
AND last_git_branches.name = $3
AND package_derivations_by_guix_revision_range.git_branch_id = git_branches.id
AND package_derivations_by_guix_revision_range.system = $4
AND package_derivations_by_guix_revision_range.target = $5
GROUP BY 1, 2, 3, 4, 5, 6
@ -459,9 +461,9 @@ FROM (
derivation_output_details.path,
derivations_by_output_details_set.derivation_output_details_set_id,
first_guix_revisions.commit AS first_guix_revision_commit,
first_git_branches.datetime AS first_datetime,
first_git_commits.datetime AS first_datetime,
last_guix_revisions.commit AS last_guix_revision_commit,
last_git_branches.datetime AS last_datetime
last_git_commits.datetime AS last_datetime
FROM package_derivations_by_guix_revision_range
INNER JOIN derivations
ON package_derivations_by_guix_revision_range.derivation_id = derivations.id
@ -469,24 +471,24 @@ FROM (
ON derivation_outputs.derivation_id = derivations.id
INNER JOIN derivation_output_details
ON derivation_outputs.derivation_output_details_id = derivation_output_details.id
INNER JOIN git_branches
ON git_branches.git_repository_id = $2
AND git_branches.name = $3
INNER JOIN guix_revisions AS first_guix_revisions
ON first_guix_revision_id = first_guix_revisions.id
INNER JOIN derivations_by_output_details_set
ON derivations_by_output_details_set.derivation_id = derivations.id
INNER JOIN git_branches AS first_git_branches
ON first_guix_revisions.git_repository_id = first_git_branches.git_repository_id
AND first_guix_revisions.commit = first_git_branches.commit
INNER JOIN git_commits AS first_git_commits
ON first_git_commits.git_branch_id = git_branches.id
AND first_guix_revisions.commit = first_git_commits.commit
INNER JOIN guix_revisions AS last_guix_revisions
ON last_guix_revision_id = last_guix_revisions.id
INNER JOIN git_branches AS last_git_branches
ON last_guix_revisions.git_repository_id = last_git_branches.git_repository_id
AND last_guix_revisions.commit = last_git_branches.commit
INNER JOIN git_commits AS last_git_commits
ON last_git_commits.git_branch_id = git_branches.id
AND last_guix_revisions.commit = last_git_commits.commit
WHERE package_name = $1
AND package_derivations_by_guix_revision_range.git_repository_id = $2
AND package_derivations_by_guix_revision_range.branch_name = $3
AND package_derivations_by_guix_revision_range.git_branch_id = git_branches.id
AND derivation_outputs.name = $4
AND first_git_branches.name = $3
AND last_git_branches.name = $3
AND package_derivations_by_guix_revision_range.system = $5
AND package_derivations_by_guix_revision_range.target = $6
) AS data1

View file

@ -176,7 +176,7 @@ FROM (
OVER derivation_window AS last_datetime
FROM (
SELECT guix_revision_id,
git_branches.datetime,
git_commits.datetime,
derivation_id
FROM guix_revision_system_test_derivations
INNER JOIN system_tests
@ -185,10 +185,12 @@ FROM (
ON guix_revisions.id = guix_revision_id
INNER JOIN git_branches
ON guix_revisions.git_repository_id = git_branches.git_repository_id
AND git_branches.commit = guix_revisions.commit
AND git_branches.name = $3
INNER JOIN git_commits
ON git_branches.id = git_commits.branch_id
AND guix_revisions.commit = git_commits.commit
WHERE system_tests.name = $1
AND guix_revisions.git_repository_id = $2
AND git_branches.name = $3
AND system = $4
) AS data1
WINDOW derivation_window AS (