haskell · Shimuuar · Sep 17, 2025 · Sep 12, 2025 · Sep 12, 2025 · Sep 14, 2025
diff --git a/vector/tests-inspect/Inspect/Alloc.hs b/vector/tests-inspect/Inspect/Alloc.hs
@@ -0,0 +1,102 @@
+{-# LANGUAGE BangPatterns        #-}
+{-# LANGUAGE CPP                 #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications    #-}
+{- |
+Here we test that GHC is able to optimize well construction of vector
+using monadic\/applicative actions. Well is understood as able to
+generate code which does not allocate except for buffer and some
+constant overhead.
+-}
+module Inspect.Alloc where
+
+import Control.Monad.ST
+import Data.Int
+-- import Data.Monoid
+import Data.Functor.Identity
+import Test.Tasty
+import Test.Tasty.HUnit
+import System.Mem
+import Test.Alloc
+
+import qualified Data.Vector.Unboxed as VU
+
+
+tests :: TestTree
+tests = testGroup "allocations"
+  [ testGroup "traversable"
+    [ testCase "IO"
+      $ checkAllocations (linear 8)
+      $ whnfIO (VU.traverse (\_ -> getAllocationCounter) vector)
+
+#if MIN_VERSION_base(4,17,0)
+    -- GHC<9.4 doesn't optimize well.
+    , testCase "ST"
+      $ checkAllocations (linear 8)
+      $ (\v -> runST $ VU.traverse (pureST . fromIntegral) v) `whnf` vector
+#endif
+
+#if MIN_VERSION_base(4,15,0)
+      -- GHC<9.0 doesn't optimize this well. And there's no appetite
+      -- for finding out why. Thus it's disabled for them. We'll still
+      -- catch regression going forward.
+    , testCase "Identity"
+      $ checkAllocations (linear 8)
+      $ VU.traverse (\n -> Identity (10*n)) `whnf` vector
+#endif
+
+    -- NOTE: Naive traversal is lazy and allocated 2 words per element
+    --
+    -- , testCase "Const Sum"
+    --   $ checkAllocations constant
+    --   $ whnf (VU.traverse (Const @_ @() . Sum)) vector
+    ]
+  , testGroup "unstreamM"
+    [ testCase "IO"
+      $ checkAllocations (linear 8)
+      $ whnfIO (VU.replicateM size getAllocationCounter)
+
+#if MIN_VERSION_base(4,17,0)
+    -- GHC<9.4 doesn't optimize well.
+    , testCase "ST"
+      $ checkAllocations (linear 8)
+      $ (\sz -> runST $ VU.generateM sz pureST) `whnf` size
+#endif
+
+    -- , testCase "Identity"
+    --   $ checkAllocations (linear 8)
+    --   $ (\sz -> VU.generateM sz (\n -> Identity (fromIntegral n :: Int64))) `whnf` size
+    ]
+  ]
+
+
+pureST :: Int -> ST s Int64
+{-# NOINLINE pureST #-}
+pureST i = pure $! fromIntegral i
+
+-- | Constant overhead. Measurement precision is 4k
+overhead :: Int64
+overhead = 4096*2
+
+-- | Vector size. It should be large so 1byte per element will be
+--   large than page.
+size :: Int
+size = 100000
+
+vector :: VU.Vector Int64
+{-# NOINLINE vector #-}
+vector = VU.generate size fromIntegral
+
+-- | N bytes per element + constant overhead. We also check that bound
+--   is tight.
+linear :: Int -> Range
+linear n = Range
+  { allocHi = fromIntegral (n * size) + overhead
+  , allocLo = fromIntegral (n * size)
+  }
+
+-- | Only constant overhead
+constant :: Range
+constant = Range { allocHi = overhead
+                 , allocLo = 0
+                 }
diff --git a/vector/tests-inspect/Test/Alloc.hs b/vector/tests-inspect/Test/Alloc.hs
@@ -0,0 +1,59 @@
+{-# LANGUAGE RecordWildCards #-}
+-- |
+-- Test that function allocates is in range. This is good way to test
+-- that GHC produces tight non-allocating loops.
+module Test.Alloc where
+
+import Control.Exception
+import Data.Int
+import System.Mem
+import Test.Tasty.HUnit
+import Text.Printf
+
+----------------------------------------------------------------
+-- Benchmarking machinery copied from tasty-bench
+----------------------------------------------------------------
+
+newtype Benchmarkable = Benchmarkable (IO ())
+
+whnf :: (a -> b) -> a -> Benchmarkable
+{-# NOINLINE whnf #-}
+whnf f a = Benchmarkable $ do _ <- evaluate (f a)
+                              return ()
+
+whnfIO :: IO a -> Benchmarkable
+{-# NOINLINE whnfIO #-}
+whnfIO io = Benchmarkable $ do _ <- evaluate =<< io
+                               return ()
+
+
+-- | Measure allocations. Measurements use 'getAllocationCounter' so
+--   it's accurate up to 4k bytes.
+allocations :: Benchmarkable -> IO Int64
+allocations (Benchmarkable io) = do
+  -- We need to run `io` twice in order to ensure that all constant
+  -- parameters are evaluated.
+  io
+  n1 <- getAllocationCounter
+  io
+  n2 <- getAllocationCounter
+  return $! n1 - n2
+
+
+-- | Expected allocations range
+data Range = Range { allocLo :: !Int64
+                   , allocHi :: !Int64
+                   }
+             deriving Show
+
+-- | Check that computation's allocations lie in range
+checkAllocations :: Range -> Benchmarkable -> IO ()
+checkAllocations Range{..} bench = do
+  alloc <- allocations bench
+  let msg = unlines [ printf "allocated = %12d" alloc
+                    , printf "Low bound = %12d" allocLo
+                    , printf "Hi  bound = %12d" allocHi
+                    ]
+  assertBool msg $ alloc <= allocHi
+                && alloc >= allocLo
+
diff --git a/vector/tests-inspect/main.hs b/vector/tests-inspect/main.hs
@@ -1,11 +1,13 @@
 module Main (main) where
 
 import qualified Inspect
+import qualified Inspect.Alloc
 import qualified Inspect.DerivingVia
 import Test.Tasty (defaultMain,testGroup)
 
 main :: IO ()
 main = defaultMain $ testGroup "tests"
   [ Inspect.tests
   , Inspect.DerivingVia.tests
+  , Inspect.Alloc.tests
   ]
diff --git a/vector/tests/Utilities.hs b/vector/tests/Utilities.hs
@@ -266,8 +266,6 @@ xs // ps = go xs ps' 0
     go [] _ _      = []
 
 
--- withIndexFirst :: (Int -> a -> [a]) -> [a] -> [a]
-
 withIndexFirst :: (((Int, a) -> b) -> [(Int, a)] -> c)
                -> ((Int -> a -> b) -> [a]        -> c)
 withIndexFirst m f = m (uncurry f) . zip [0::Int ..]

diff --git a/vector/vector.cabal b/vector/vector.cabal
@@ -252,13 +252,16 @@ test-suite vector-inspection
   main-is:          main.hs
   default-language: Haskell2010
   Other-modules:    Inspect
+                    Inspect.Alloc
                     Inspect.DerivingVia
                     Inspect.DerivingVia.OtherFoo
+                    Test.Alloc
   build-depends:
         base                     -any
       , primitive                >= 0.6.4.0 && < 0.10
       , vector                   -any
       , tasty
+      , tasty-hunit
       , tasty-inspection-testing >= 0.1
 
 library benchmarks-O2