Rework loading revision data
These changes were motivated by switching to a mechanism of loading data that isn't dependent on the big advisory lock that prevents more than one revision from being processed at a time. Since INSERT ... RETURNING id; is used, this can block if another transaction inserts the same data, and then cause an error when that transaction commits. The solution is to use ON CONFLICT DO NOTHING, but you have to handle the case when the INSERT doesn't return an id since the other transaction has inserted it. This commit rewrites insert-missing-data-and-return-all-ids to do as described above, as well as being more efficient in how existing data is detected and to use more vectors. Other utilities for inserting data are added as well.
This commit is contained in:
parent
b128e9bd7a
commit
5ed98343d7
25 changed files with 874 additions and 1149 deletions
|
|
@ -846,95 +846,84 @@
|
|||
conn
|
||||
(inferior-packages->license-id-lists
|
||||
conn
|
||||
;; TODO Don't needlessly convert
|
||||
(vector->list
|
||||
(assq-ref inferior-packages-data 'license-data))))))
|
||||
(assq-ref inferior-packages-data 'license-data)))))
|
||||
(all-package-metadata-ids
|
||||
new-package-metadata-ids
|
||||
(with-time-logging "inserting package metadata entries"
|
||||
(inferior-packages->package-metadata-ids
|
||||
conn
|
||||
;; TODO Don't needlessly convert
|
||||
(vector->list
|
||||
(assq-ref inferior-packages-data 'metadata))
|
||||
(assq-ref inferior-packages-data 'metadata)
|
||||
package-license-set-ids)))
|
||||
(replacement-package-ids
|
||||
(vector-map
|
||||
(lambda (_ package-index-or-false)
|
||||
(if package-index-or-false
|
||||
(first
|
||||
(vector-ref
|
||||
(inferior-packages->package-ids
|
||||
conn
|
||||
(list (list (vector-ref names package-index-or-false)
|
||||
(vector-ref versions package-index-or-false)
|
||||
(list-ref all-package-metadata-ids
|
||||
package-index-or-false)
|
||||
(cons "integer" NULL)))))
|
||||
(vector
|
||||
(list (vector-ref names package-index-or-false)
|
||||
(vector-ref versions package-index-or-false)
|
||||
(vector-ref all-package-metadata-ids
|
||||
package-index-or-false)
|
||||
(cons "integer" NULL))))
|
||||
0)
|
||||
(cons "integer" NULL)))
|
||||
(assq-ref inferior-packages-data 'replacements))))
|
||||
|
||||
(unless (null? new-package-metadata-ids)
|
||||
(unless (= 0 (vector-length new-package-metadata-ids))
|
||||
(with-time-logging "inserting package metadata tsvector entries"
|
||||
(insert-package-metadata-tsvector-entries
|
||||
conn new-package-metadata-ids)))
|
||||
|
||||
(with-time-logging "getting package-ids (without replacements)"
|
||||
(list->vector
|
||||
(inferior-packages->package-ids
|
||||
conn
|
||||
;; TODO Do this more efficiently
|
||||
(zip (vector->list names)
|
||||
(vector->list versions)
|
||||
all-package-metadata-ids
|
||||
(vector->list replacement-package-ids)))))))
|
||||
(inferior-packages->package-ids
|
||||
conn
|
||||
;; Similar to zip, but generating a vector of lists
|
||||
(vector-map (lambda (index . vals) vals)
|
||||
names
|
||||
versions
|
||||
all-package-metadata-ids
|
||||
replacement-package-ids)))))
|
||||
|
||||
(define (insert-lint-warnings conn
|
||||
package-ids
|
||||
lint-checker-ids
|
||||
lint-warnings-data)
|
||||
(lint-warnings-data->lint-warning-ids
|
||||
conn
|
||||
(append-map!
|
||||
(concatenate!
|
||||
(filter-map
|
||||
(lambda (lint-checker-id warnings-per-package)
|
||||
(if warnings-per-package
|
||||
(vector-fold
|
||||
(lambda (_ result package-id warnings)
|
||||
(append!
|
||||
result
|
||||
(map
|
||||
(match-lambda
|
||||
((location-data messages-by-locale)
|
||||
(let ((location-id
|
||||
(location->location-id
|
||||
conn
|
||||
(apply location location-data)))
|
||||
(lint-warning-message-set-id
|
||||
(lint-warning-message-data->lint-warning-message-set-id
|
||||
conn
|
||||
messages-by-locale)))
|
||||
(list lint-checker-id
|
||||
package-id
|
||||
location-id
|
||||
lint-warning-message-set-id))))
|
||||
(fold (lambda (location-and-messages result)
|
||||
;; TODO Sort to delete duplicates, rather than use member
|
||||
(if (member location-and-messages result)
|
||||
(begin
|
||||
(apply
|
||||
simple-format
|
||||
(current-error-port)
|
||||
"warning: skipping duplicate lint warning ~A ~A\n"
|
||||
location-and-messages)
|
||||
result)
|
||||
(append! result
|
||||
(list location-and-messages))))
|
||||
'()
|
||||
warnings))))
|
||||
(if (null? warnings)
|
||||
result
|
||||
(cons
|
||||
(lint-warnings-data->lint-warning-ids
|
||||
conn
|
||||
(list->vector
|
||||
(map
|
||||
(match-lambda
|
||||
((location-data messages-by-locale)
|
||||
(let ((location-id
|
||||
(location->location-id
|
||||
conn
|
||||
(apply location location-data)))
|
||||
(lint-warning-message-set-id
|
||||
(lint-warning-message-data->lint-warning-message-set-id
|
||||
conn
|
||||
messages-by-locale)))
|
||||
(list lint-checker-id
|
||||
package-id
|
||||
location-id
|
||||
lint-warning-message-set-id))))
|
||||
warnings)))
|
||||
result)))
|
||||
'()
|
||||
package-ids
|
||||
warnings-per-package)
|
||||
'()))
|
||||
lint-checker-ids
|
||||
#f))
|
||||
(vector->list lint-checker-ids)
|
||||
lint-warnings-data)))
|
||||
|
||||
(define (update-derivation-ids-hash-table! conn
|
||||
|
|
@ -977,30 +966,25 @@
|
|||
#:key (log-tag "unspecified"))
|
||||
|
||||
(define (insert-into-derivations conn drvs)
|
||||
(string-append
|
||||
"INSERT INTO derivations "
|
||||
"(file_name, builder, args, env_vars, system_id) VALUES "
|
||||
(string-join
|
||||
(map (match-lambda
|
||||
(($ <derivation> outputs inputs sources
|
||||
system builder args env-vars file-name)
|
||||
(simple-format
|
||||
#f "('~A', '~A', ARRAY[~A]::varchar[], ARRAY[~A], '~A')"
|
||||
file-name
|
||||
builder
|
||||
(string-join (map quote-string args) ",")
|
||||
(string-join (map (match-lambda
|
||||
((key . value)
|
||||
(string-append
|
||||
"['" key '"', $$"
|
||||
value "$$ ]")))
|
||||
env-vars)
|
||||
",")
|
||||
(system->system-id conn system))))
|
||||
drvs)
|
||||
",")
|
||||
" RETURNING id"
|
||||
";"))
|
||||
(insert-missing-data-and-return-all-ids
|
||||
conn
|
||||
"derivations"
|
||||
'(file_name builder args env_vars system_id)
|
||||
(vector-map (match-lambda*
|
||||
((_ ($ <derivation> outputs inputs sources
|
||||
system builder args env-vars file-name))
|
||||
(list file-name
|
||||
builder
|
||||
(cons "varchar[]"
|
||||
(list->vector args))
|
||||
(cons "varchar[][]"
|
||||
(list->vector
|
||||
(map (match-lambda
|
||||
((key . value)
|
||||
(vector key value)))
|
||||
env-vars)))
|
||||
(system->system-id conn system))))
|
||||
drvs)))
|
||||
|
||||
(define (insert-derivations)
|
||||
(with-resource-from-pool postgresql-connection-pool conn
|
||||
|
|
@ -1021,45 +1005,41 @@
|
|||
(let ((derivations
|
||||
;; Do this while holding the PostgreSQL connection to
|
||||
;; avoid conflicts with other fibers
|
||||
(delete-duplicates
|
||||
(filter-map (lambda (derivation)
|
||||
(if (hash-ref derivation-ids-hash-table
|
||||
(derivation-file-name
|
||||
derivation))
|
||||
#f
|
||||
derivation))
|
||||
unfiltered-derivations))))
|
||||
(if (null? derivations)
|
||||
(values '() '())
|
||||
(list->vector
|
||||
(delete-duplicates
|
||||
(filter-map (lambda (derivation)
|
||||
(if (hash-ref derivation-ids-hash-table
|
||||
(derivation-file-name
|
||||
derivation))
|
||||
#f
|
||||
derivation))
|
||||
unfiltered-derivations)))))
|
||||
(if (= 0 (vector-length derivations))
|
||||
(values #() #())
|
||||
(begin
|
||||
(simple-format
|
||||
(current-error-port)
|
||||
"insert-missing-derivations: inserting ~A derivations (~A)\n"
|
||||
(length derivations)
|
||||
(vector-length derivations)
|
||||
log-tag)
|
||||
(let ((derivation-ids
|
||||
(append-map!
|
||||
(lambda (chunk)
|
||||
(map (lambda (result)
|
||||
(string->number (car result)))
|
||||
(exec-query conn (insert-into-derivations conn chunk))))
|
||||
(chunk derivations 500))))
|
||||
(insert-into-derivations conn derivations)))
|
||||
|
||||
;; Do this while holding the connection so that other
|
||||
;; fibers don't also try inserting the same derivations
|
||||
(with-time-logging
|
||||
(string-append "insert-missing-derivations: updating hash table (" log-tag ")")
|
||||
(for-each (lambda (derivation derivation-id)
|
||||
(hash-set! derivation-ids-hash-table
|
||||
(derivation-file-name derivation)
|
||||
derivation-id))
|
||||
(vector-for-each (lambda (_ derivation derivation-id)
|
||||
(hash-set! derivation-ids-hash-table
|
||||
(derivation-file-name derivation)
|
||||
derivation-id))
|
||||
derivations
|
||||
derivation-ids))
|
||||
|
||||
(simple-format
|
||||
(current-error-port)
|
||||
"insert-missing-derivations: finished inserting ~A derivations (~A)\n"
|
||||
(length derivations)
|
||||
(vector-length derivations)
|
||||
log-tag)
|
||||
|
||||
(values derivations
|
||||
|
|
@ -1137,10 +1117,10 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
|
|||
hash
|
||||
compressed-nar-bytevector
|
||||
uncompressed-size))))))))
|
||||
sources-ids
|
||||
(vector->list sources-ids)
|
||||
sources)))))
|
||||
derivation-ids
|
||||
derivations)))
|
||||
(vector->list derivation-ids)
|
||||
(vector->list derivations))))
|
||||
|
||||
(let ((derivations
|
||||
derivation-ids
|
||||
|
|
@ -1154,12 +1134,13 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
|
|||
(string-append "insert-missing-derivations: inserting outputs ("
|
||||
log-tag ")")
|
||||
(with-resource-from-pool postgresql-connection-pool conn
|
||||
(for-each (lambda (derivation-id derivation)
|
||||
(insert-derivation-outputs conn
|
||||
derivation-id
|
||||
(derivation-outputs derivation)))
|
||||
derivation-ids
|
||||
derivations)))
|
||||
(vector-for-each
|
||||
(lambda (_ derivation-id derivation)
|
||||
(insert-derivation-outputs conn
|
||||
derivation-id
|
||||
(derivation-outputs derivation)))
|
||||
derivation-ids
|
||||
derivations)))
|
||||
|
||||
(with-time-logging
|
||||
(string-append
|
||||
|
|
@ -1169,7 +1150,7 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
|
|||
(map
|
||||
derivation-input-derivation
|
||||
(append-map derivation-inputs
|
||||
derivations))))
|
||||
(vector->list derivations)))))
|
||||
(unless (null? input-derivations)
|
||||
;; Ensure all the input derivations exist
|
||||
(for-each
|
||||
|
|
@ -1182,12 +1163,14 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
|
|||
#:log-tag log-tag))
|
||||
(chunk! input-derivations 1000))))))
|
||||
|
||||
(string-append "insert-missing-derivations: done parallel (" log-tag ")")
|
||||
(simple-format
|
||||
(current-error-port)
|
||||
"debug: insert-missing-derivations: done parallel (~A)\n" log-tag)
|
||||
(with-resource-from-pool postgresql-connection-pool conn
|
||||
(with-time-logging
|
||||
(simple-format
|
||||
#f "insert-missing-derivations: inserting inputs for ~A derivations (~A)"
|
||||
(length derivations)
|
||||
(vector-length derivations)
|
||||
log-tag)
|
||||
(insert-derivation-inputs conn
|
||||
derivation-ids
|
||||
|
|
@ -1913,25 +1896,28 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
|
|||
(define inferior-lint-checkers-data
|
||||
(call-with-inferior
|
||||
(lambda (inferior inferior-store)
|
||||
(inferior-lint-checkers inferior))))
|
||||
(list->vector
|
||||
(inferior-lint-checkers inferior)))))
|
||||
|
||||
(when inferior-lint-checkers-data
|
||||
(letpar& ((lint-checker-ids
|
||||
(with-resource-from-pool postgresql-connection-pool conn
|
||||
(lint-checkers->lint-checker-ids
|
||||
conn
|
||||
(map (match-lambda
|
||||
((name descriptions-by-locale network-dependent)
|
||||
(list
|
||||
name
|
||||
network-dependent
|
||||
;; Uses of sort may cause problems
|
||||
(call-with-worker-thread
|
||||
utility-thread-channel
|
||||
(lambda ()
|
||||
(lint-checker-description-data->lint-checker-description-set-id
|
||||
conn descriptions-by-locale))))))
|
||||
inferior-lint-checkers-data))))
|
||||
(vector-map
|
||||
(match-lambda*
|
||||
((_ (name descriptions-by-locale network-dependent))
|
||||
(list
|
||||
name
|
||||
network-dependent
|
||||
;; Uses of sort may cause problems
|
||||
(call-with-worker-thread
|
||||
utility-thread-channel
|
||||
(lambda ()
|
||||
(lint-checker-description-data->lint-checker-description-set-id
|
||||
conn
|
||||
descriptions-by-locale))))))
|
||||
inferior-lint-checkers-data))))
|
||||
(lint-warnings-data
|
||||
(fibers-map
|
||||
(match-lambda
|
||||
|
|
@ -1946,7 +1932,8 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
|
|||
(inferior-lint-warnings inferior
|
||||
inferior-store
|
||||
checker-name)))))))
|
||||
inferior-lint-checkers-data)))
|
||||
(vector->list
|
||||
inferior-lint-checkers-data))))
|
||||
|
||||
(let ((package-ids (fibers-force package-ids-promise)))
|
||||
(with-resource-from-pool postgresql-connection-pool conn
|
||||
|
|
@ -1955,20 +1942,21 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
|
|||
(fibers-force guix-revision-id-promise)
|
||||
lint-checker-ids)
|
||||
|
||||
(let ((lint-warning-ids
|
||||
(insert-lint-warnings
|
||||
(let ((lint-warning-id-vectors
|
||||
(with-time-logging "inserting lint warnings"
|
||||
(insert-lint-warnings
|
||||
conn
|
||||
package-ids
|
||||
lint-checker-ids
|
||||
lint-warnings-data))))
|
||||
(with-time-logging "inserting guix revision lint warnings"
|
||||
(for-each
|
||||
(lambda (lint-warning-ids)
|
||||
(insert-guix-revision-lint-warnings
|
||||
conn
|
||||
package-ids
|
||||
lint-checker-ids
|
||||
lint-warnings-data)))
|
||||
(chunk-for-each!
|
||||
(lambda (lint-warning-ids-chunk)
|
||||
(insert-guix-revision-lint-warnings
|
||||
conn
|
||||
(fibers-force guix-revision-id-promise)
|
||||
lint-warning-ids-chunk))
|
||||
5000
|
||||
lint-warning-ids)))))))
|
||||
(fibers-force guix-revision-id-promise)
|
||||
lint-warning-ids))
|
||||
lint-warning-id-vectors))))))))
|
||||
|
||||
(define (extract-and-store-package-derivations)
|
||||
(define packages-count
|
||||
|
|
@ -2113,7 +2101,8 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
|
|||
guix-revision-id
|
||||
package-derivation-ids-chunk)))
|
||||
2000
|
||||
package-derivation-ids)))
|
||||
;; TODO Chunk more efficiently
|
||||
(vector->list package-derivation-ids))))
|
||||
|
||||
'finished)
|
||||
|
||||
|
|
@ -2386,7 +2375,7 @@ SELECT 1 FROM derivation_source_file_nars WHERE derivation_source_file_id = $1"
|
|||
(with-time-logging "updating builds.derivation_output_details_set_id"
|
||||
(update-builds-derivation-output-details-set-id
|
||||
conn
|
||||
(string->number (fibers-force guix-revision-id-promise)))))
|
||||
(fibers-force guix-revision-id-promise))))
|
||||
(begin
|
||||
(simple-format #t "Failed to generate store item for ~A\n"
|
||||
commit)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue