guix-data-service/guix-data-service/heap-profiler.scm
Christopher Baines ca5f0036f3 Add a heap-profiler module
Taken from a Guile mailing list post.
2024-11-08 22:34:28 +00:00

225 lines
8.8 KiB
Scheme

;;; Copyright © 2022 Ludovic Courtès <ludo@gnu.org>
;;;
;;; Distributed under the GNU Lesser General Public License, version 3 or (at
;;; your option) any later version.
(define-module (guix-data-service heap-profiler)
#:use-module (system foreign)
#:use-module (system base types internal)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-9 gnu)
#:use-module (ice-9 match)
#:use-module (ice-9 control)
#:use-module (ice-9 format)
#:use-module (ice-9 rdelim)
#:use-module (ice-9 regex)
#:export (profile-heap))
(define-immutable-record-type <memory-mapping>
(memory-mapping start end permissions name)
memory-mapping?
(start memory-mapping-start)
(end memory-mapping-end)
(permissions memory-mapping-permissions)
(name memory-mapping-name))
(define (memory-mappings pid) ;based on Guile's 'gc-profile.scm'
"Return an list of alists, each of which contains information about a memory
mapping of process @var{pid}. This information is obtained by reading
@file{/proc/PID/maps} on Linux. See `procs(5)' for details."
(define mapping-line-rx
;; As of Linux 2.6.32.28, an `maps' line looks like this:
;; "00400000-0041d000 r--p 00000000 fd:00 7926441 /bin/cat".
(make-regexp
"^([[:xdigit:]]+)-([[:xdigit:]]+) ([rwx-]{3}[ps]) ([[:xdigit:]]+) (fd|[[:xdigit:]]{2}):[[:xdigit:]]{2} [0-9]+[[:blank:]]+(.*)$"))
(call-with-input-file (format #f "/proc/~a/maps" pid)
(lambda (port)
(let loop ((result '()))
(match (read-line port)
((? eof-object?)
(reverse result))
(line
(cond ((regexp-exec mapping-line-rx line)
=>
(lambda (match)
(let ((start (string->number (match:substring match 1)
16))
(end (string->number (match:substring match 2)
16))
(perms (match:substring match 3))
(name (match:substring match 6)))
(loop (cons (memory-mapping
start end perms
(if (string=? name "")
#f
name))
result)))))
(else
(loop result)))))))))
;; (define random-valid-address
;; ;; XXX: This is only in libgc with back pointers.
;; (let ((ptr (false-if-exception
;; (dynamic-func "GC_generate_random_valid_address" (dynamic-link)))))
;; (if ptr
;; (pointer->procedure '* ptr '())
;; (const #f))))
(define (heap-sections)
(filter (lambda (mapping)
(and (not (memory-mapping-name mapping))
(string=? "rw-p" (memory-mapping-permissions mapping))))
(memory-mappings (getpid))))
(define (random-valid-address heap-sections)
;; Mimic 'GC_generate_random_valid_address', which is only available with
;; '-DBACK_PTRS' builds of libgc.
(define heap-size
(fold (lambda (mapping size)
(+ size (- (memory-mapping-end mapping)
(memory-mapping-start mapping))))
0
heap-sections))
(let loop ((sections heap-sections)
(size 0)
(offset (random heap-size)))
(match sections
(() #f)
((section . rest)
(let* ((start (memory-mapping-start section))
(end (memory-mapping-end section))
(section-size (- end start)))
(if (< offset section-size)
(let ((result (base-pointer (+ start offset))))
;; (pk 'p (number->string (+ start offset) 16) result)
(if (null-pointer? result)
(loop heap-sections 0 (random heap-size)) ;retry
result))
(loop rest
(+ size section-size)
(- offset section-size))))))))
(define object-size
(pointer->procedure size_t
(dynamic-func "GC_size" (dynamic-link))
'(*)))
(define base-pointer
(pointer->procedure '*
(dynamic-func "GC_base" (dynamic-link))
(list uintptr_t)))
(define (heap-tag->type-name word)
"Return the type name as a symbol corresponding to the tag WORD."
(match (let/ec return
(let-syntax ((tag-name (syntax-rules ()
((_ name pred mask tag)
(when (= (logand word mask) tag)
(return 'name))))))
(visit-heap-tags tag-name)
'unknown))
('program
(cond ((= (logand word #x1000) #x1000)
'partial-continuation)
((= (logand word #x2000) #x2000)
'foreign-program)
((= (logand word #x800) #x800)
'continuation)
((= (logand word #x400) #x400)
'primitive-generic)
((= (logand word #x200) #x200)
'primitive)
((= (logand word #x100) #x100)
'boot-program)
(else
'program)))
(type
type)))
(define* (profile-heap #:key (sample-count 100000))
"Pick SAMPLE-COUNT addresses in the GC-managed heap and display a profile
of this sample per data type."
(define heap-size
(assoc-ref (gc-stats) 'heap-size))
(define heap
(heap-sections))
(let ((objects (make-hash-table 57))
(visited (make-hash-table)))
(let loop ((i sample-count))
(unless (zero? i)
(let ((address (random-valid-address heap)))
(if (hashv-ref visited (pointer-address address))
(loop i)
(begin
(hashv-set! visited (pointer-address address) #t)
(let* ((tag (pointer-address (dereference-pointer address)))
(type (heap-tag->type-name tag))
(size (match type
('pair (* 2 (sizeof '*)))
('vector
(min (ash tag -8)
(object-size address)))
(_ (object-size address)))))
;; (when (eq? 'unknown type)
;; (pk (object-size address)))
;; (when (eq? 'vector type)
;; (pk 'vector size 'tag tag 'addr address 'vs (object-size address)))
(hashq-set! objects type
(match (hashq-ref objects type '(0 . 0))
((count . total)
(cons (+ count 1) (+ total size))))))
(loop (- i 1)))))))
(let ((grand-total (hash-fold (lambda (type stats result)
(match stats
((_ . total)
(+ total result))))
0
objects)))
(format (current-error-port)
" % type self avg obj size~%")
(for-each (match-lambda
((type . (count . total))
(format (current-error-port) "~5,1f ~30a ~14h ~7,1f~%"
(* 100. (/ total grand-total))
type total
(/ total count 1.))))
(sort (hash-map->list cons objects)
(match-lambda*
(((_ . (count1 . total1)) (_ . (count2 . total2)))
(or (> total1 total2)
(and (= total1 total2)
(> count1 count2)))))))
(format (current-error-port) "sampled heap: ~h MiB (heap size: ~h MiB)~%"
(/ grand-total (expt 2. 20))
(/ heap-size (expt 2. 20))))))
(define (heap-samples type count)
"Sample COUNT objects of the given TYPE, a symbol such as 'vector, and
return them.
WARNING: This can crash your application as this could pick bogus or
finalized objects."
(define heap
(heap-sections))
(let ((visited (make-hash-table)))
(let loop ((i count)
(objects '()))
(if (zero? i)
objects
(let ((address (random-valid-address heap)))
(if (hashv-ref visited (pointer-address address))
(loop i objects)
(begin
(hashv-set! visited (pointer-address address) #t)
(let ((tag (pointer-address (dereference-pointer address))))
(if (eq? type (heap-tag->type-name tag))
(loop (- i 1)
(cons (pointer->scm address) objects))
(loop i objects))))))))))