Handle conflicts in insert-missing-data-and-return-all-ids

This commit is contained in:
Christopher Baines 2024-11-29 13:49:01 +00:00
parent c949deb9d4
commit bb84e45c42

View file

@ -323,7 +323,7 @@ WHERE table_name = $1"
")")) ")"))
missing-data) missing-data)
", ") ", ")
" RETURNING id")) " ON CONFLICT DO NOTHING"))
(define (format-json json) (define (format-json json)
;; PostgreSQL formats JSON strings differently to guile-json, so use ;; PostgreSQL formats JSON strings differently to guile-json, so use
@ -355,16 +355,17 @@ WHERE table_name = $1"
(error (simple-format #f "normalise-values: error: ~A\n" unknown)))) (error (simple-format #f "normalise-values: error: ~A\n" unknown))))
data)) data))
(let* ((existing-entries (let* ((flattened-deduplicated-data
(if use-temporary-table?
(let ((temp-table-name
(string-append "temp_" table-name))
(data
(if sets-of-data? (if sets-of-data?
(delete-duplicates* (concatenate data)) (delete-duplicates*
(concatenate data))
(if delete-duplicates? (if delete-duplicates?
(delete-duplicates* data) (delete-duplicates* data)
data)))) data)))
(existing-entries
(if use-temporary-table?
(let ((temp-table-name
(string-append "temp_" table-name)))
;; Create a temporary table to store the data ;; Create a temporary table to store the data
(exec-query (exec-query
conn conn
@ -379,11 +380,10 @@ WHERE table_name = $1"
"ANALYZE " temp-table-name)) "ANALYZE " temp-table-name))
;; Populate the temporary table ;; Populate the temporary table
(if (null? data) (unless (null? flattened-deduplicated-data)
'()
(with-time-logging (string-append "populating " temp-table-name) (with-time-logging (string-append "populating " temp-table-name)
(exec-query conn (exec-query conn
(insert-sql data (insert-sql flattened-deduplicated-data
#:table-name temp-table-name)))) #:table-name temp-table-name))))
;; Use the temporary table to find the existing values ;; Use the temporary table to find the existing values
(let ((result (let ((result
@ -400,7 +400,7 @@ WHERE table_name = $1"
result)) result))
;; If not using a temporary table, just do a single SELECT query ;; If not using a temporary table, just do a single SELECT query
(if (null? data) (if (null? flattened-deduplicated-data)
'() '()
(fold (fold
(lambda (data-chunk result) (lambda (data-chunk result)
@ -411,15 +411,10 @@ WHERE table_name = $1"
(string->number (first result))) (string->number (first result)))
#:vhash result)) #:vhash result))
vlist-null vlist-null
(chunk (if sets-of-data? (chunk flattened-deduplicated-data
(delete-duplicates*
(concatenate data))
data)
3000))))) 3000)))))
(missing-entries (missing-entries
(let loop ((lst (if sets-of-data? (let loop ((lst flattened-deduplicated-data)
(concatenate data)
data))
(result '())) (result '()))
(if (null? lst) (if (null? lst)
(if delete-duplicates? (if delete-duplicates?
@ -440,10 +435,12 @@ WHERE table_name = $1"
'() '()
(append-map! (append-map!
(lambda (missing-entries-chunk) (lambda (missing-entries-chunk)
(map (lambda (result)
(string->number (first result)))
(exec-query conn (exec-query conn
(insert-sql missing-entries-chunk)))) (insert-sql missing-entries-chunk))
(map (lambda (row)
(string->number (first row)))
(exec-query conn (select-query missing-entries-chunk))))
(chunk missing-entries 3000)))) (chunk missing-entries 3000))))
(new-entries-lookup-vhash (new-entries-lookup-vhash
@ -474,4 +471,4 @@ WHERE table_name = $1"
(error "missing entry" field-values)))) (error "missing entry" field-values))))
data)))) data))))
(values all-ids (values all-ids
new-entries))) (delete-duplicates/sort! new-entries <))))