From 7435d2ece079af4dde799fe95d6910a0a6566005 Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Fri, 12 Sep 2025 18:46:25 +0300 Subject: [PATCH 1/4] Small fixes suggested by @lehins Co-authored-by: Alexey Kuleshevich --- vector/tests/Utilities.hs | 2 -- 1 file changed, 2 deletions(-) diff --git a/vector/tests/Utilities.hs b/vector/tests/Utilities.hs index 3c88926c..e4e6dc2d 100644 --- a/vector/tests/Utilities.hs +++ b/vector/tests/Utilities.hs @@ -266,8 +266,6 @@ xs // ps = go xs ps' 0 go [] _ _ = [] --- withIndexFirst :: (Int -> a -> [a]) -> [a] -> [a] - withIndexFirst :: (((Int, a) -> b) -> [(Int, a)] -> c) -> ((Int -> a -> b) -> [a] -> c) withIndexFirst m f = m (uncurry f) . zip [0::Int ..] From 11905f3fd76b8f051f5bce80890d7db1f9a15f7b Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Fri, 12 Sep 2025 19:45:12 +0300 Subject: [PATCH 2/4] Add test for allocations This is good way to check that GHC generate tight non-allocating loops for monadic/applicative functions. --- vector/tests-inspect/Inspect/Alloc.hs | 74 +++++++++++++++++++++++++++ vector/tests-inspect/Test/Alloc.hs | 59 +++++++++++++++++++++ vector/tests-inspect/main.hs | 2 + vector/vector.cabal | 3 ++ 4 files changed, 138 insertions(+) create mode 100644 vector/tests-inspect/Inspect/Alloc.hs create mode 100644 vector/tests-inspect/Test/Alloc.hs diff --git a/vector/tests-inspect/Inspect/Alloc.hs b/vector/tests-inspect/Inspect/Alloc.hs new file mode 100644 index 00000000..b4f55e4f --- /dev/null +++ b/vector/tests-inspect/Inspect/Alloc.hs @@ -0,0 +1,74 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} +{- | +Here we test that GHC is able to optimize well construction of vector +using monadic\/applicative actions. Well is understood as able to +generate code which does not allocate except for buffer and some +constant overhead. +-} +module Inspect.Alloc where + +import Data.Int +-- import Data.Monoid +import Data.Functor.Identity +import Test.Tasty +import Test.Tasty.HUnit +import System.Mem +import Test.Alloc + +import qualified Data.Vector.Unboxed as VU + + +tests :: TestTree +tests = testGroup "allocations" + [ testGroup "traversable" + [ testCase "IO" + $ checkAllocations (linear 8) + $ whnfIO (VU.traverse (\_ -> getAllocationCounter) vector) + , testCase "Identity" + $ checkAllocations (linear 8) + $ VU.traverse (\n -> Identity (10*n)) `whnf` vector + -- NOTE: Naive traversal is lazy and allocated 2 words per element + -- + -- , testCase "Const Sum" + -- $ checkAllocations constant + -- $ whnf (VU.traverse (Const @_ @() . Sum)) vector + ] + , testGroup "unstreamM" + [ testCase "IO" + $ checkAllocations (linear 8) + $ whnfIO (VU.replicateM size getAllocationCounter) + -- , testCase "Identity" + -- $ checkAllocations (linear 8) + -- $ (\sz -> VU.generateM sz (\n -> Identity (fromIntegral n :: Int64))) `whnf` size + ] + ] + + +-- | Constant overhead. Measurement precision is 4k +overhead :: Int64 +overhead = 4096*2 + +-- | Vector size. It should be large so 1byte per element will be +-- large than page. +size :: Int +size = 100000 + +vector :: VU.Vector Int64 +{-# NOINLINE vector #-} +vector = VU.generate size fromIntegral + +-- | N bytes per element + constant overhead. We also check that bound +-- is tight. +linear :: Int -> Range +linear n = Range + { allocHi = fromIntegral (n * size) + overhead + , allocLo = fromIntegral (n * size) + } + +-- | Only constant overhead +constant :: Range +constant = Range { allocHi = overhead + , allocLo = 0 + } diff --git a/vector/tests-inspect/Test/Alloc.hs b/vector/tests-inspect/Test/Alloc.hs new file mode 100644 index 00000000..6720cbc4 --- /dev/null +++ b/vector/tests-inspect/Test/Alloc.hs @@ -0,0 +1,59 @@ +{-# LANGUAGE RecordWildCards #-} +-- | +-- Test that function allocates is in range. This is good way to test +-- that GHC produces tight non-allocating loops. +module Test.Alloc where + +import Control.Exception +import Data.Int +import System.Mem +import Test.Tasty.HUnit +import Text.Printf + +---------------------------------------------------------------- +-- Benchmarking machinery copied from tasty-bench +---------------------------------------------------------------- + +newtype Benchmarkable = Benchmarkable (IO ()) + +whnf :: (a -> b) -> a -> Benchmarkable +{-# NOINLINE whnf #-} +whnf f a = Benchmarkable $ do _ <- evaluate (f a) + return () + +whnfIO :: IO a -> Benchmarkable +{-# NOINLINE whnfIO #-} +whnfIO io = Benchmarkable $ do _ <- evaluate =<< io + return () + + +-- | Measure allocations. Measurements use 'getAllocationCounter' so +-- it's accurate up to 4k bytes. +allocations :: Benchmarkable -> IO Int64 +allocations (Benchmarkable io) = do + -- We need to run `io` twice in order to ensure that all constant + -- parameters are evaluated. + io + n1 <- getAllocationCounter + io + n2 <- getAllocationCounter + return $! n1 - n2 + + +-- | Expected allocations range +data Range = Range { allocLo :: !Int64 + , allocHi :: !Int64 + } + deriving Show + +-- | Check that computation's allocations lie in range +checkAllocations :: Range -> Benchmarkable -> IO () +checkAllocations Range{..} bench = do + alloc <- allocations bench + let msg = unlines [ printf "allocated = %12d" alloc + , printf "Low bound = %12d" allocLo + , printf "Hi bound = %12d" allocHi + ] + assertBool msg $ alloc <= allocHi + && alloc >= allocLo + diff --git a/vector/tests-inspect/main.hs b/vector/tests-inspect/main.hs index 52dd6db0..0a67eb9f 100644 --- a/vector/tests-inspect/main.hs +++ b/vector/tests-inspect/main.hs @@ -1,6 +1,7 @@ module Main (main) where import qualified Inspect +import qualified Inspect.Alloc import qualified Inspect.DerivingVia import Test.Tasty (defaultMain,testGroup) @@ -8,4 +9,5 @@ main :: IO () main = defaultMain $ testGroup "tests" [ Inspect.tests , Inspect.DerivingVia.tests + , Inspect.Alloc.tests ] diff --git a/vector/vector.cabal b/vector/vector.cabal index 62a4369b..479b35fe 100644 --- a/vector/vector.cabal +++ b/vector/vector.cabal @@ -252,13 +252,16 @@ test-suite vector-inspection main-is: main.hs default-language: Haskell2010 Other-modules: Inspect + Inspect.Alloc Inspect.DerivingVia Inspect.DerivingVia.OtherFoo + Test.Alloc build-depends: base -any , primitive >= 0.6.4.0 && < 0.10 , vector -any , tasty + , tasty-hunit , tasty-inspection-testing >= 0.1 library benchmarks-O2 From 1097859069c3c35be05935f71810d55289fb8b57 Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Sun, 14 Sep 2025 17:40:47 +0300 Subject: [PATCH 3/4] Add tests for ST as well --- vector/tests-inspect/Inspect/Alloc.hs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/vector/tests-inspect/Inspect/Alloc.hs b/vector/tests-inspect/Inspect/Alloc.hs index b4f55e4f..f7fecf2f 100644 --- a/vector/tests-inspect/Inspect/Alloc.hs +++ b/vector/tests-inspect/Inspect/Alloc.hs @@ -9,6 +9,7 @@ constant overhead. -} module Inspect.Alloc where +import Control.Monad.ST import Data.Int -- import Data.Monoid import Data.Functor.Identity @@ -26,6 +27,9 @@ tests = testGroup "allocations" [ testCase "IO" $ checkAllocations (linear 8) $ whnfIO (VU.traverse (\_ -> getAllocationCounter) vector) + , testCase "ST" + $ checkAllocations (linear 8) + $ (\v -> runST $ VU.traverse (pureST . fromIntegral) v) `whnf` vector , testCase "Identity" $ checkAllocations (linear 8) $ VU.traverse (\n -> Identity (10*n)) `whnf` vector @@ -39,6 +43,9 @@ tests = testGroup "allocations" [ testCase "IO" $ checkAllocations (linear 8) $ whnfIO (VU.replicateM size getAllocationCounter) + , testCase "ST" + $ checkAllocations (linear 8) + $ (\sz -> runST $ VU.generateM sz pureST) `whnf` size -- , testCase "Identity" -- $ checkAllocations (linear 8) -- $ (\sz -> VU.generateM sz (\n -> Identity (fromIntegral n :: Int64))) `whnf` size @@ -46,6 +53,10 @@ tests = testGroup "allocations" ] +pureST :: Int -> ST s Int64 +{-# NOINLINE pureST #-} +pureST i = pure $! fromIntegral i + -- | Constant overhead. Measurement precision is 4k overhead :: Int64 overhead = 4096*2 From fc83758a6df166dc984f0be7368cfa2211a37d33 Mon Sep 17 00:00:00 2001 From: Alexey Khudyakov Date: Wed, 17 Sep 2025 19:59:02 +0300 Subject: [PATCH 4/4] Disable allocation test for older GHC --- vector/tests-inspect/Inspect/Alloc.hs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/vector/tests-inspect/Inspect/Alloc.hs b/vector/tests-inspect/Inspect/Alloc.hs index f7fecf2f..be399e5f 100644 --- a/vector/tests-inspect/Inspect/Alloc.hs +++ b/vector/tests-inspect/Inspect/Alloc.hs @@ -1,4 +1,5 @@ {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeApplications #-} {- | @@ -27,12 +28,23 @@ tests = testGroup "allocations" [ testCase "IO" $ checkAllocations (linear 8) $ whnfIO (VU.traverse (\_ -> getAllocationCounter) vector) + +#if MIN_VERSION_base(4,17,0) + -- GHC<9.4 doesn't optimize well. , testCase "ST" $ checkAllocations (linear 8) $ (\v -> runST $ VU.traverse (pureST . fromIntegral) v) `whnf` vector +#endif + +#if MIN_VERSION_base(4,15,0) + -- GHC<9.0 doesn't optimize this well. And there's no appetite + -- for finding out why. Thus it's disabled for them. We'll still + -- catch regression going forward. , testCase "Identity" $ checkAllocations (linear 8) $ VU.traverse (\n -> Identity (10*n)) `whnf` vector +#endif + -- NOTE: Naive traversal is lazy and allocated 2 words per element -- -- , testCase "Const Sum" @@ -43,9 +55,14 @@ tests = testGroup "allocations" [ testCase "IO" $ checkAllocations (linear 8) $ whnfIO (VU.replicateM size getAllocationCounter) + +#if MIN_VERSION_base(4,17,0) + -- GHC<9.4 doesn't optimize well. , testCase "ST" $ checkAllocations (linear 8) $ (\sz -> runST $ VU.generateM sz pureST) `whnf` size +#endif + -- , testCase "Identity" -- $ checkAllocations (linear 8) -- $ (\sz -> VU.generateM sz (\n -> Identity (fromIntegral n :: Int64))) `whnf` size