diff --git a/data-lib/data/gvector.rkt b/data-lib/data/gvector.rkt index ef36797..4098668 100644 --- a/data-lib/data/gvector.rkt +++ b/data-lib/data/gvector.rkt @@ -3,16 +3,23 @@ (require (for-syntax racket/base syntax/contract syntax/for-body) + racket/performance-hint racket/serialize + racket/fixnum racket/contract/base racket/dict + racket/unsafe/ops racket/vector racket/struct) (define DEFAULT-CAPACITY 10) +(define MIN-CAPACITY 8) + (define (make-gvector #:capacity [capacity DEFAULT-CAPACITY]) - (gvector (make-vector capacity #f) 0)) + (unless (exact-nonnegative-integer? capacity) + (raise-argument-error* 'make-gvector 'data/gvector "exact-nonnegative-integer?" capacity)) + (gvector (make-vector (max capacity MIN-CAPACITY) 0) 0)) (define gvector* (let ([gvector @@ -29,43 +36,62 @@ (unless (< index hi) (raise-range-error who "gvector" "" index gv 0 (sub1 hi)))) -;; ensure-free-space! : GVector Nat -> Void -(define (ensure-free-space! gv needed-free-space) - (define vec (gvector-vec gv)) - (define n (gvector-n gv)) - (define cap (vector-length vec)) - (define needed-cap (+ n needed-free-space)) - (unless (<= needed-cap cap) - (define new-cap - (let loop ([new-cap (max DEFAULT-CAPACITY cap)]) - (if (<= needed-cap new-cap) new-cap (loop (* 2 new-cap))))) - (define new-vec (make-vector new-cap #f)) - (vector-copy! new-vec 0 vec) - (set-gvector-vec! gv new-vec))) - -(define gvector-add! - (case-lambda - [(gv item) - (ensure-free-space! gv 1) - (define n (gvector-n gv)) - (define v (gvector-vec gv)) - (vector-set! v n item) - (set-gvector-n! gv (add1 n))] - [(gv . items) - (define item-count (length items)) - (ensure-free-space! gv item-count) - (define n (gvector-n gv)) - (define v (gvector-vec gv)) - (for ([index (in-naturals n)] [item (in-list items)]) - (vector-set! v index item)) - (set-gvector-n! gv (+ n item-count))])) +(begin-encourage-inline + + (define (check-gvector who gv) + (unless (gvector? gv) + (raise-argument-error* who 'data/gvector "gvector?" gv))) + + + ;; ensure-free-space-vec! : Vector Nat Nat -> Vector/#f + (define (ensure-free-space-vec! vec n needed-free-space) + (define cap (unsafe-vector*-length vec)) + (define needed-cap (unsafe-fx+ n needed-free-space)) + (cond [(<= needed-cap cap) #f] + [else + ;; taken from Rust's raw_vec implementation + (let* ([new-cap (unsafe-fxmax (unsafe-fx* 2 cap) needed-cap)] + [new-cap (unsafe-fxmax new-cap MIN-CAPACITY)]) + (vector*-extend vec new-cap 0))])) + + (define (ensure-free-space! gv needed-free-space) + (define v (ensure-free-space-vec! (gvector-vec gv) (gvector-n gv) needed-free-space)) + (when v (set-gvector-vec! gv v))) + + (define-syntax-rule (define/ensure-space! (n v) gv needed-free-space) + (begin (define n (gvector-n gv)) + (define v1 (gvector-vec gv)) + (define v2 (ensure-free-space-vec! v1 n needed-free-space)) + (define v (if v2 (begin (set-gvector-vec! gv v2) v2) v1)))) + + ;; only safe on unchaperoned gvectors + (define (unsafe-gvector-add! gv item) + (define/ensure-space! (n v) gv 1) + (unsafe-vector*-set! v n item) + (set-gvector-n! gv (unsafe-fx+ 1 n))) + + (define gvector-add! + (case-lambda + [(gv item) + (check-gvector 'gvector-add! gv) + (define/ensure-space! (n v) gv 1) + (unsafe-vector*-set! v n item) + (set-gvector-n! gv (unsafe-fx+ 1 n))] + [(gv . items) + (check-gvector 'gvector-add! gv) + (define item-count (length items)) + (define/ensure-space! (n v) gv item-count) + (for ([index (in-naturals n)] [item (in-list items)]) + (unsafe-vector*-set! v index item)) + (set-gvector-n! gv (+ n item-count))]))) ;; SLOW! (define (gvector-insert! gv index item) ;; This does (n - index) redundant copies on resize, but that ;; happens rarely and I prefer the simpler code. - (define n (gvector-n gv)) + (check-gvector 'gvector-insert! gv) (check-index 'gvector-insert! gv index #t) + (define n (gvector-n gv)) (ensure-free-space! gv 1) (define v (gvector-vec gv)) (vector-copy! v (add1 index) v index n) @@ -97,6 +123,7 @@ ;; SLOW! (define (gvector-remove! gv index) + (check-gvector 'gvector-remove! gv) (define n (gvector-n gv)) (define v (gvector-vec gv)) (check-index 'gvector-remove! gv index #f) @@ -106,6 +133,7 @@ (trim! gv)) (define (gvector-remove-last! gv) + (check-gvector 'gvector-remove-last! gv) (let ([n (gvector-n gv)] [v (gvector-vec gv)]) (unless (> n 0) (error 'gvector-remove-last! "empty")) @@ -114,45 +142,74 @@ last-val)) (define (gvector-count gv) + (check-gvector 'gvector-count gv) (gvector-n gv)) (define none (gensym 'none)) (define (gvector-ref gv index [default none]) + (check-gvector 'gvector-ref gv) (unless (exact-nonnegative-integer? index) (raise-type-error 'gvector-ref "exact nonnegative integer" index)) - (if (< index (gvector-n gv)) - (vector-ref (gvector-vec gv) index) - (cond [(eq? default none) - (check-index 'gvector-ref gv index #f)] - [(procedure? default) (default)] - [else default]))) + (let ([v (gvector-vec gv)]) + (if (< index (gvector-n gv)) + (unsafe-vector*-ref v index) + (cond [(eq? default none) + (check-index 'gvector-ref gv index #f)] + [(procedure? default) (default)] + [else default])))) + +(define (gvector-append! gv gv*) + (check-gvector 'gvector-append! gv) + (check-gvector 'gvector-append! gv*) + (ensure-free-space! gv (gvector-n gv*)) + (vector-copy! (gvector-vec gv) (gvector-n gv) (gvector-vec gv*))) + +(define (gvector-append gv gv*) + (check-gvector 'gvector-append gv) + (check-gvector 'gvector-append gv*) + ;; retain the spare capacity of gv* + (define gv0 (make-gvector #:capacity (+ (gvector-n gv) (vector-length (gvector-vec gv*))))) + (define v0 (gvector-vec gv0)) + (vector-copy! v0 0 (gvector-vec gv) 0 (gvector-n gv)) + (vector-copy! v0 (gvector-n gv) (gvector-vec gv*) (gvector-n gv*))) + ;; gvector-set! with index = |gv| is interpreted as gvector-add! (define (gvector-set! gv index item) - (let ([n (gvector-n gv)]) + (check-gvector 'gvector-set! gv) + (let ([v (gvector-vec gv)] + [n (gvector-n gv)]) (check-index 'gvector-set! gv index #t) - (if (= index n) - (gvector-add! gv item) - (vector-set! (gvector-vec gv) index item)))) + (if (unsafe-fx= index n) + (if (impersonator? gv) + (gvector-add! gv item) + (unsafe-gvector-add! gv item)) + (unsafe-vector*-set! v index item)))) ;; creates a snapshot vector (define (gvector->vector gv) - (vector-copy (gvector-vec gv) 0 (gvector-n gv))) + (check-gvector 'gvector->vector gv) + (vector*-copy (gvector-vec gv) 0 (gvector-n gv))) (define (gvector->list gv) + (check-gvector 'gvector->list gv) (vector->list (gvector->vector gv))) ;; constructs a gvector (define (vector->gvector v) + (unless (vector? v) + (raise-argument-error* vector->gvector 'data/gvector "vector?" v)) (define lv (vector-length v)) - (define gv (make-gvector #:capacity lv)) + (define gv (make-gvector #:capacity (max lv DEFAULT-CAPACITY))) (define nv (gvector-vec gv)) (vector-copy! nv 0 v) (set-gvector-n! gv lv) gv) (define (list->gvector v) + (unless (list? v) + (raise-argument-error* list->gvector 'data/gvector "list?" v)) (vector->gvector (list->vector v))) ;; Iteration methods @@ -165,8 +222,8 @@ (define (gvector-iterate-next gv iter) (check-index 'gvector-iterate-next gv iter #f) (let ([n (gvector-n gv)]) - (and (< (add1 iter) n) - (add1 iter)))) + (and (< (unsafe-fx+ 1 iter) n) + (unsafe-fx+ 1 iter)))) (define (gvector-iterate-key gv iter) (check-index 'gvector-iterate-key gv iter #f) @@ -177,8 +234,7 @@ (gvector-ref gv iter)) (define (in-gvector gv) - (unless (gvector? gv) - (raise-type-error 'in-gvector "gvector" gv)) + (check-gvector 'in-gvector gv) (in-dict-values gv)) (define-sequence-syntax in-gvector* @@ -192,11 +248,11 @@ (:do-in ([(gv) gv-expr-c]) (void) ;; outer-check; handled by contract ([index 0] [vec (gvector-vec gv)] [n (gvector-n gv)]) ;; loop bindings - (< index n) ;; pos-guard - ([(var) (vector-ref vec index)]) ;; inner bindings + (unsafe-fx< index n) ;; pos-guard + ([(var) (unsafe-vector*-ref vec index)]) ;; inner bindings #t ;; pre-guard #t ;; post-guard - ((add1 index) (gvector-vec gv) (gvector-n gv)))]))] + ((unsafe-fx+ 1 index) vec n))]))] [[(var ...) (in-gv gv-expr)] (with-syntax ([gv-expr-c (wrap-expr/c #'gvector? #'gv-expr #:macro #'in-gv)]) (syntax/loc stx @@ -206,25 +262,34 @@ (define-syntax (for/gvector stx) (syntax-case stx () [(_ (clause ...) . body) + #'(for/gvector #:capacity DEFAULT-CAPACITY (clause ...) . body)] + [(_ #:capacity cap (clause ...) . body) (with-syntax ([((pre-body ...) post-body) (split-for-body stx #'body)]) (quasisyntax/loc stx - (let ([gv (make-gvector)]) + (let ([gv (make-gvector #:capacity cap)]) (for/fold/derived #,stx () (clause ...) pre-body ... - (call-with-values (lambda () . post-body) - (lambda args (apply gvector-add! gv args) (values)))) + (call-with-values (lambda () . post-body) + (case-lambda + [(one) (unsafe-gvector-add! gv one)] + [args (apply gvector-add! gv args)])) + (values)) gv)))])) (define-syntax (for*/gvector stx) (syntax-case stx () [(_ (clause ...) . body) + #'(for/gvector #:capacity DEFAULT-CAPACITY (clause ...) . body)] + [(_ #:capacity cap (clause ...) . body) (with-syntax ([((pre-body ...) post-body) (split-for-body stx #'body)]) (quasisyntax/loc stx - (let ([gv (make-gvector)]) + (let ([gv (make-gvector #:capacity cap)]) (for*/fold/derived #,stx () (clause ...) pre-body ... (call-with-values (lambda () . post-body) - (lambda args (apply gvector-add! gv args) (values)))) + (case-lambda + [(one) (begin (unsafe-gvector-add! gv one) (values))] + [args (begin (apply gvector-add! gv args) (values))]))) gv)))])) (struct gvector (vec n) @@ -276,39 +341,26 @@ #t (or (current-load-relative-directory) (current-directory)))) -(provide/contract - [gvector? - (-> any/c any)] - [rename gvector* gvector - (->* () () #:rest any/c gvector?)] - [make-gvector - (->* () (#:capacity exact-positive-integer?) gvector?)] - [gvector-ref - (->* (gvector? exact-nonnegative-integer?) (any/c) any)] - [gvector-set! - (-> gvector? exact-nonnegative-integer? any/c any)] - [gvector-add! - (->* (gvector?) () #:rest any/c any)] - [gvector-insert! - (-> gvector? exact-nonnegative-integer? any/c any)] - [gvector-remove! - (-> gvector? exact-nonnegative-integer? any)] - [gvector-remove-last! - (-> gvector? any)] - [gvector-count - (-> gvector? any)] - [gvector->vector - (-> gvector? vector?)] - [gvector->list - (-> gvector? list?)] - [vector->gvector - (-> vector? gvector?)] - [list->gvector - (-> list? gvector?)]) - -(provide (rename-out [in-gvector* in-gvector]) - for/gvector - for*/gvector) +(provide + gvector? + (rename-out [gvector* gvector]) + make-gvector + gvector-ref + gvector-set! + gvector-add! + gvector-insert! + gvector-remove! + gvector-remove-last! + gvector-append + gvector-append! + gvector-count + gvector->vector + gvector->list + vector->gvector + list->gvector + (rename-out [in-gvector* in-gvector]) + for/gvector + for*/gvector) (module+ deserialize (provide deserialize-gvector) diff --git a/data-lib/info.rkt b/data-lib/info.rkt index ff63629..23f284c 100644 --- a/data-lib/info.rkt +++ b/data-lib/info.rkt @@ -1,6 +1,6 @@ #lang info (define collection 'multi) -(define deps '(("base" #:version "6.2.900.6"))) +(define deps '(("base" #:version "8.12.0.10"))) (define build-deps '("rackunit-lib")) (define pkg-desc "implementation (no documentation) part of \"data\"") diff --git a/data-test/tests/data/gvector-stress.rkt b/data-test/tests/data/gvector-stress.rkt new file mode 100644 index 0000000..608847a --- /dev/null +++ b/data-test/tests/data/gvector-stress.rkt @@ -0,0 +1,280 @@ +#lang racket/base +(require data/gvector + rackunit) + +;; Multi-threaded stress tests for gvector memory safety. +;; Tests both regular threads (concurrent access) and parallel +;; threads (using #:pool for true parallelism). +;; +;; The primary goal is memory safety: no crashes or segfaults. +;; Count correctness is checked for non-parallel (coroutine) threads +;; where operations are interleaved but not truly concurrent. +;; For parallel threads, count may be incorrect due to races on the +;; n field, but operations must not cause memory-unsafe behavior. + +(define ITEMS-PER-THREAD 1000) +(define NUM-THREADS 8) + +;; Test 1: Concurrent adds from multiple coroutine threads +(test-case "concurrent gvector-add!" + (for ([_ (in-range 10)]) + (define gv (make-gvector)) + (define threads + (for/list ([t (in-range NUM-THREADS)]) + (thread (lambda () + (for ([i (in-range ITEMS-PER-THREAD)]) + (gvector-add! gv i)))))) + (for-each thread-wait threads) + (check-equal? (gvector-count gv) (* NUM-THREADS ITEMS-PER-THREAD)))) + +;; Test 2: Parallel adds — tests memory safety, not count correctness +(test-case "parallel gvector-add!" + (for ([_ (in-range 10)]) + (define gv (make-gvector)) + (define pool (make-parallel-thread-pool NUM-THREADS)) + (define threads + (for/list ([t (in-range NUM-THREADS)]) + (thread #:pool pool + (lambda () + (for ([i (in-range ITEMS-PER-THREAD)]) + (gvector-add! gv i)))))) + (parallel-thread-pool-close pool) + (for-each thread-wait threads) + ;; Count may be less than expected due to n-field races, + ;; but we should not crash + (check-true (> (gvector-count gv) 0)))) + +;; Test 3: Concurrent reads while adding +(test-case "concurrent add + ref" + (for ([_ (in-range 10)]) + (define gv (make-gvector)) + (define stop? #f) + (define writers + (for/list ([t (in-range 4)]) + (thread (lambda () + (for ([i (in-range ITEMS-PER-THREAD)]) + (gvector-add! gv i)))))) + ;; Reader threads - should never crash even with stale data + (define readers + (for/list ([t (in-range 4)]) + (thread (lambda () + (let loop () + (define n (gvector-count gv)) + (when (> n 0) + (gvector-ref gv (sub1 n) #f)) + (unless stop? + (loop))))))) + (for-each thread-wait writers) + (set! stop? #t) + (for-each thread-wait readers) + (check-equal? (gvector-count gv) (* 4 ITEMS-PER-THREAD)))) + +;; Test 4: Parallel reads while adding — the key memory safety test. +;; Without vec-before-n ordering, this can segfault when a concurrent +;; remove shrinks the vector between reading n and reading vec. +(test-case "parallel add + ref" + (for ([_ (in-range 10)]) + (define gv (make-gvector)) + (define stop? #f) + (define pool (make-parallel-thread-pool 8)) + (define writers + (for/list ([t (in-range 4)]) + (thread #:pool pool + (lambda () + (for ([i (in-range ITEMS-PER-THREAD)]) + (gvector-add! gv i)))))) + (define readers + (for/list ([t (in-range 4)]) + (thread #:pool pool + (lambda () + (let loop () + (define n (gvector-count gv)) + (when (> n 0) + (gvector-ref gv (sub1 n) #f)) + (unless stop? + (loop))))))) + (parallel-thread-pool-close pool) + (for-each thread-wait writers) + (set! stop? #t) + (for-each thread-wait readers))) + +;; Test 5: Concurrent iteration while adding +(test-case "concurrent add + in-gvector" + (for ([_ (in-range 10)]) + (define gv (make-gvector)) + (define stop? #f) + (define writers + (for/list ([t (in-range 4)]) + (thread (lambda () + (for ([i (in-range ITEMS-PER-THREAD)]) + (gvector-add! gv i)))))) + (define readers + (for/list ([t (in-range 4)]) + (thread (lambda () + (let loop () + (for ([x (in-gvector gv)]) + (void)) + (unless stop? + (loop))))))) + (for-each thread-wait writers) + (set! stop? #t) + (for-each thread-wait readers))) + +;; Test 6: Parallel iteration while adding +(test-case "parallel add + in-gvector" + (for ([_ (in-range 10)]) + (define gv (make-gvector)) + (define stop? #f) + (define pool (make-parallel-thread-pool 8)) + (define writers + (for/list ([t (in-range 4)]) + (thread #:pool pool + (lambda () + (for ([i (in-range ITEMS-PER-THREAD)]) + (gvector-add! gv i)))))) + (define readers + (for/list ([t (in-range 4)]) + (thread #:pool pool + (lambda () + (let loop () + (for ([x (in-gvector gv)]) + (void)) + (unless stop? + (loop))))))) + (parallel-thread-pool-close pool) + (for-each thread-wait writers) + (set! stop? #t) + (for-each thread-wait readers))) + +;; Test 7: Concurrent add + remove-last! +(test-case "concurrent add + remove-last!" + (for ([_ (in-range 10)]) + (define gv (make-gvector)) + (for ([i (in-range 100)]) + (gvector-add! gv i)) + (define writers + (for/list ([t (in-range 4)]) + (thread (lambda () + (for ([i (in-range ITEMS-PER-THREAD)]) + (gvector-add! gv i)))))) + (define removers + (for/list ([t (in-range 2)]) + (thread (lambda () + (let loop ([removed 0]) + (when (< removed 100) + (with-handlers ([exn:fail? (lambda (e) (loop removed))]) + (gvector-remove-last! gv) + (loop (add1 removed))))))))) + (for-each thread-wait writers) + (for-each thread-wait removers))) + +;; Test 8: Parallel add + remove — the key race condition test. +;; Without vec-before-n ordering in gvector-ref, a remove that shrinks +;; the backing vector can cause an out-of-bounds unsafe-vector*-ref. +(test-case "parallel add + remove-last!" + (for ([_ (in-range 10)]) + (define gv (make-gvector)) + (for ([i (in-range 100)]) + (gvector-add! gv i)) + (define pool (make-parallel-thread-pool 6)) + (define writers + (for/list ([t (in-range 4)]) + (thread #:pool pool + (lambda () + (for ([i (in-range ITEMS-PER-THREAD)]) + (gvector-add! gv i)))))) + (define removers + (for/list ([t (in-range 2)]) + (thread #:pool pool + (lambda () + (let loop ([removed 0]) + (when (< removed 100) + (with-handlers ([exn:fail? (lambda (e) (loop removed))]) + (gvector-remove-last! gv) + (loop (add1 removed))))))))) + (parallel-thread-pool-close pool) + (for-each thread-wait writers) + (for-each thread-wait removers))) + +;; Test 9: Concurrent gvector-set! +(test-case "concurrent gvector-set!" + (for ([_ (in-range 10)]) + (define gv (make-gvector)) + (for ([i (in-range 100)]) + (gvector-add! gv i)) + (define threads + (for/list ([t (in-range NUM-THREADS)]) + (thread (lambda () + (for ([i (in-range ITEMS-PER-THREAD)]) + (gvector-set! gv (modulo i 100) i)))))) + (for-each thread-wait threads) + (check-equal? (gvector-count gv) 100))) + +;; Test 10: Parallel gvector-set! +(test-case "parallel gvector-set!" + (for ([_ (in-range 10)]) + (define gv (make-gvector)) + (for ([i (in-range 100)]) + (gvector-add! gv i)) + (define pool (make-parallel-thread-pool NUM-THREADS)) + (define threads + (for/list ([t (in-range NUM-THREADS)]) + (thread #:pool pool + (lambda () + (for ([i (in-range ITEMS-PER-THREAD)]) + (gvector-set! gv (modulo i 100) i)))))) + (parallel-thread-pool-close pool) + (for-each thread-wait threads) + (check-equal? (gvector-count gv) 100))) + +;; Test 11: for/gvector correctness under concurrency +(test-case "for/gvector concurrent correctness" + (for ([_ (in-range 10)]) + (define ch (make-channel)) + (define threads + (for/list ([t (in-range NUM-THREADS)]) + (thread (lambda () + (define gv (for/gvector ([i (in-range ITEMS-PER-THREAD)]) i)) + (channel-put ch gv))))) + (for ([_ (in-range NUM-THREADS)]) + (define gv (channel-get ch)) + (check-equal? (gvector-count gv) ITEMS-PER-THREAD)))) + +;; Test 12: Parallel read + remove stress — specifically targets the +;; vec-before-n ordering bug. Readers continuously read the last element +;; while removers shrink the vector, which can trigger the race where +;; n is read before vec, and vec gets replaced with a shorter one. +;; We fill with many elements then remove most of them to trigger trim! +;; which replaces vec with a shorter vector. +(test-case "parallel ref + remove stress" + (for ([_ (in-range 50)]) + (define gv (make-gvector)) + ;; Fill with enough elements that removing most will trigger trim! + ;; trim! fires when cap >= 4*n, so removing 400 of 500 leaves n=100 + ;; with cap=512 (or similar), triggering shrink to 256, still safe. + ;; But if we keep removing down to n=10 with cap=256, trim! shrinks + ;; to 128, then 64, etc. During any shrink, the vec is replaced. + (for ([i (in-range 1000)]) + (gvector-add! gv i)) + (define pool (make-parallel-thread-pool 8)) + ;; Readers that aggressively read near the end of the gvector + (define readers + (for/list ([t (in-range 4)]) + (thread #:pool pool + (lambda () + (for ([_ (in-range 50000)]) + (define n (gvector-count gv)) + (when (> n 0) + ;; Access near the end where trim! is most dangerous + (gvector-ref gv (sub1 n) #f))))))) + ;; Removers that aggressively shrink the gvector + (define removers + (for/list ([t (in-range 4)]) + (thread #:pool pool + (lambda () + (for ([_ (in-range 200)]) + (with-handlers ([exn:fail? void]) + (gvector-remove-last! gv))))))) + (parallel-thread-pool-close pool) + (for-each thread-wait readers) + (for-each thread-wait removers)))