Kill long running load new revision jobs

There are some revisions of Guix which take forever to process (or days at
least). To avoid jobs being processed forever, kill them after they've been
running for a while (default 24 hours).
This commit is contained in:
Christopher Baines 2019-07-12 20:02:44 +01:00
parent 83ef624b97
commit 3dfa9212f1

View file

@ -21,10 +21,15 @@
4)) 4))
1)) 1))
(define default-timeout
(* (* 60 60) ;; 1 hour in seconds
24))
(define* (process-jobs-concurrently fetch-new-jobs (define* (process-jobs-concurrently fetch-new-jobs
process-job process-job
#:key (max-processes #:key (max-processes
default-max-processes)) default-max-processes)
(timeout default-timeout))
(define processes (define processes
(make-hash-table)) (make-hash-table))
@ -44,9 +49,10 @@
"\n" "\n"
(string-concatenate (string-concatenate
(hash-map->list (hash-map->list
(lambda (pid job-args) (match-lambda*
((pid (start-time job-args))
(format #f " pid: ~5d job args: ~a\n" (format #f " pid: ~5d job args: ~a\n"
pid job-args)) pid job-args)))
processes)) processes))
"\n"))) "\n")))
@ -59,17 +65,32 @@
;; No process to wait for ;; No process to wait for
#f) #f)
((pid . status) ((pid . status)
(let ((job-args (hashv-ref processes pid)))
(hashv-remove! processes pid) (hashv-remove! processes pid)
(simple-format (simple-format (current-error-port)
(current-error-port)
"pid ~A failed with status ~A\n" "pid ~A failed with status ~A\n"
pid status)) pid status)
;; Recurse, to check for other finished processes.
(wait-on-processes)))) (wait-on-processes))))
(lambda (key . args) (lambda (key . args)
(simple-format #t "key ~A args ~A\n" (simple-format #t "key ~A args ~A\n"
key args)))) key args))))
(define (kill-long-running-processes)
(hash-map->list
(match-lambda*
((pid (start-time job-args))
(let ((running-for
(- (current-time) start-time)))
(when (> running-for timeout)
(display
(simple-format
#f "sending SIGTERM to pid ~A started at ~A, now running for ~A\n"
pid start-time running-for)
(current-error-port))
(kill pid SIGTERM)))))
processes))
(define (fork-and-process-job job-args) (define (fork-and-process-job job-args)
(match (primitive-fork) (match (primitive-fork)
(0 (0
@ -80,10 +101,12 @@
(lambda () (lambda ()
(primitive-exit 127)))) (primitive-exit 127))))
(pid (pid
(hashv-set! processes pid job-args) (hashv-set! processes pid
(list (current-time) job-args))
#t))) #t)))
(while #t (while #t
(kill-long-running-processes)
(wait-on-processes) (wait-on-processes)
(display-status) (display-status)
(match (fetch-new-jobs) (match (fetch-new-jobs)