guix-data-service/guix-data-service/web/server.scm
Christopher Baines fc6f78ca9a Move the gc watcher to start earlier
This means it doesn't use the fibers sleep, don't know if this makes a
difference.
2025-06-29 21:30:29 +02:00

374 lines
16 KiB
Scheme

;;; Guix Data Service -- Information about Guix over time
;;; Copyright © 2017 Ricardo Wurmus <rekado@elephly.net>
;;; Copyright © 2019, 2020, 2022, 2023 Christopher Baines <mail@cbaines.net>
;;;
;;; This program is free software: you can redistribute it and/or
;;; modify it under the terms of the GNU Affero General Public License
;;; as published by the Free Software Foundation, either version 3 of
;;; the License, or (at your option) any later version.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Affero General Public License for more details.
;;;
;;; You should have received a copy of the GNU Affero General Public
;;; License along with this program. If not, see
;;; <http://www.gnu.org/licenses/>.
(define-module (guix-data-service web server)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-71)
#:use-module (ice-9 match)
#:use-module (ice-9 format)
#:use-module (ice-9 threads)
#:use-module (web http)
#:use-module (web request)
#:use-module (web uri)
#:use-module (system repl error-handling)
#:use-module (ice-9 atomic)
#:use-module (fibers)
#:use-module (fibers channels)
#:use-module (fibers scheduler)
#:use-module (fibers conditions)
#:use-module (knots)
#:use-module (knots web-server)
#:use-module (knots thread-pool)
#:use-module (knots resource-pool)
#:use-module (prometheus)
#:use-module (guix-data-service utils)
#:use-module (guix-data-service database)
#:use-module (guix-data-service web controller)
#:use-module (guix-data-service web util)
#:use-module (guix-data-service web render)
#:use-module (guix-data-service web view html)
#:use-module (guix-data-service model build-status)
#:use-module (guix-data-service model blocked-builds)
#:use-module (guix-data-service model guix-revision-package-derivation)
#:use-module (guix-data-service model build-background-processing-queue)
#:export (%guix-data-service-metrics-registry
start-guix-data-service-web-server))
(define (check-startup-completed startup-completed)
(if (atomic-box-ref startup-completed)
(begin
;; Just in case this atomic-box-ref is expensive, only do it when
;; necessary
(set! check-startup-completed (const #t))
#t)
#f))
(define (handler request finished?
body controller secret-key-base startup-completed
render-metrics)
(with-exception-handler
(lambda (exn)
(with-exception-handler
(lambda _ #f)
(lambda ()
(simple-format (current-error-port)
"exception when logging: ~A\n" exn))
#:unwind? #t)
;; If we can't log, exit
(signal-condition! finished?))
(lambda ()
(display
(format #f "~a ~a\n"
(request-method request)
(uri-path (request-uri request)))))
#:unwind? #t)
(apply values
(let-values (((request-components mime-types)
(request->path-components-and-mime-type request)))
(controller request
(cons (request-method request)
request-components)
mime-types
body
secret-key-base
(check-startup-completed startup-completed)
render-metrics))))
(define %guix-data-service-metrics-registry
(make-parameter #f))
(define* (start-guix-data-service-web-server port host secret-key-base
startup-completed
#:key postgresql-statement-timeout
postgresql-connections)
(define registry
(make-metrics-registry #:namespace "guixdataservice"))
(%database-metrics-registry registry)
(%guix-data-service-metrics-registry registry)
(fluid-set! %file-port-name-canonicalization 'none)
(with-exception-handler
(lambda (exn)
(simple-format #t "failed enabling core dumps: ~A\n" exn))
(lambda ()
(setrlimit 'core #f #f))
#:unwind? #t)
(let ((core-file
(string-append (getcwd) "/core"))
(metric
(make-gauge-metric registry
"core_dump_file_last_modified_seconds")))
(when (file-exists? core-file)
(metric-set metric
(stat:mtime (stat core-file)))))
(with-exception-handler
(lambda (exn)
(simple-format #t "failed increasing open file limit: ~A\n" exn))
(lambda ()
(setrlimit 'nofile 4096 4096))
#:unwind? #t)
(let ((finished? (make-condition))
(request-scheduler #f))
(call-with-sigint
(lambda ()
(run-fibers
(lambda ()
(let* ((current (current-scheduler))
(schedulers
(cons current (scheduler-remote-peers current))))
(for-each
(lambda (i sched)
(spawn-fiber
(lambda ()
(catch 'system-error
(lambda ()
(set-thread-name
(string-append "server " (number->string i))))
(const #t)))
sched))
(iota (length schedulers))
schedulers))
(let ((requests-metric
(make-counter-metric registry "requests_total")))
(with-exception-handler
(lambda (exn)
(simple-format
(current-error-port)
"\n
error: guix-data-service could not start: ~A
Check if it's already running, or whether another process is using that
port. Also, the port used can be changed by passing the --port option.\n"
exn)
(primitive-exit 1))
(lambda ()
(parameterize
((background-connection-pool
(make-resource-pool
(lambda ()
(open-postgresql-connection
"background"
postgresql-statement-timeout))
4
#:name "background"
#:idle-seconds 5
#:destructor
(lambda (conn)
(close-postgresql-connection conn "background"))))
(connection-pool
(make-resource-pool
(lambda ()
(open-postgresql-connection
"web"
postgresql-statement-timeout))
(floor (/ postgresql-connections 2))
#:name "web"
#:idle-seconds 30
#:destructor
(lambda (conn)
(close-postgresql-connection conn "web"))
#:default-max-waiters (floor (/ postgresql-connections 2))
#:default-checkout-timeout (/ postgresql-statement-timeout
1000)))
(reserved-connection-pool
(make-resource-pool
(lambda ()
(open-postgresql-connection
"web-reserved"
postgresql-statement-timeout))
(floor (/ postgresql-connections 2))
#:name "web-reserved"
#:idle-seconds 600
#:destructor
(lambda (conn)
(close-postgresql-connection conn "web-reserved"))
#:default-max-waiters postgresql-connections
#:default-checkout-timeout 6)))
(let ((resource-pool-checkout-failures-metric
(make-counter-metric registry
"resource_pool_checkout_timeouts_total"
#:labels '(pool_name))))
(resource-pool-default-timeout-handler
(lambda (pool proc timeout)
(let ((pool-name
(cond
((eq? pool (connection-pool)) "normal")
((eq? pool (reserved-connection-pool)) "reserved")
(else #f))))
(when pool-name
(metric-increment
resource-pool-checkout-failures-metric
#:label-values `((pool_name . ,pool-name))))))))
(spawn-fiber
(lambda ()
(while (not (check-startup-completed startup-completed))
(sleep 1))
(with-exception-handler
(lambda _ #f)
(lambda ()
(with-exception-handler
(lambda (exn)
(simple-format
#t
"exception when processing the background jobs queue")
(print-backtrace-and-exception/knots
exn)
(raise-exception exn))
(lambda ()
(call-with-resource-from-pool (background-connection-pool)
(lambda (conn)
(let ((build-ids
(select-background-processing-build-ids conn)))
(unless (null? build-ids)
(simple-format #t "processing ~A builds from the background queue\n"
(length build-ids)))
(for-each
(lambda (build-id)
(let ((status (select-latest-build-status-by-build-id
conn
build-id)))
(cond
((string=? status "succeeded")
(handle-removing-blocking-build-entries-for-successful-builds
conn
(list build-id)))
((string=? status "scheduled")
(handle-blocked-builds-entries-for-scheduled-builds
conn
(list build-id)))
((member status '("failed"
"failed-dependency"
"canceled"))
(handle-populating-blocked-builds-for-build-failures
conn
(list build-id)))))
(delete-background-processing-entries-for-build-ids
conn
(list build-id)))
build-ids)))))))
#:unwind? #t))
#:parallel? #t)
(spawn-fiber
(lambda ()
(while (not (check-startup-completed startup-completed))
(sleep 1))
(with-resource-from-pool (background-connection-pool) conn
(backfill-guix-revision-package-derivation-distribution-counts
conn)))
#:parallel? #t)
(let ((render-metrics (make-render-metrics registry)))
(run-knots-web-server
(lambda (request)
(with-exception-handler
(lambda (exn)
(when (resource-pool-timeout-error? exn)
(spawn-fiber
(lambda ()
(let* ((pool (resource-pool-timeout-error-pool exn))
(stats (resource-pool-stats pool #:timeout #f)))
(simple-format (current-error-port)
"resource pool timeout error: ~A, ~A\n"
pool
stats)))))
(let ((path-components
mime-types
(request->path-components-and-mime-type request))
(pool-exn?
(or (resource-pool-timeout-error? exn)
(resource-pool-too-many-waiters-error? exn))))
(case (most-appropriate-mime-type
mime-types
'(text/html application/json))
((application/json)
(apply
values
(render-json `((error . ,(if (%show-error-details)
(simple-format #f "~A" exn)
#f)))
#:code (if pool-exn?
503
500))))
(else
(apply
values
(render-html #:sxml (error-page
(if (%show-error-details)
exn
#f))
#:code (if pool-exn?
503
500)))))))
(lambda ()
(with-exception-handler
(lambda (exn)
(let* ((error-string
(call-with-output-string
(lambda (port)
(simple-format
port
"exception when processing: ~A ~A\n"
(request-method request)
(uri-path (request-uri request)))
(print-backtrace-and-exception/knots
exn
#:port port)))))
(display error-string
(current-error-port)))
(raise-exception exn))
(lambda ()
(metric-increment requests-metric)
(let ((body (read-request-body request)))
(handler request finished? body controller
secret-key-base
startup-completed
render-metrics)))))
#:unwind? #t))
#:connection-buffer-size (expt 2 14)
#:host host
#:port port)))
#:unwind? #t)))
;; Guile sometimes just seems to stop listening on the port, so try
;; and detect this and quit
(spawn-port-monitoring-fiber port finished?)
(wait finished?))
#:hz 0
#:parallelism 1))
finished?)))