diff --git a/.github/workflows/32bit-ci.yml b/.github/workflows/32bit-ci.yml new file mode 100644 index 00000000..c09a9287 --- /dev/null +++ b/.github/workflows/32bit-ci.yml @@ -0,0 +1,32 @@ +# This config is mostly copied from +# https://github.com/haskell/bytestring/blob/master/.github/workflows/ci.yml + +name: 32bit-ci +on: + push: + branches: + - master + pull_request: {} # Validate all PRs + +defaults: + run: + shell: bash + +jobs: + i386: + runs-on: ubuntu-latest + container: + image: i386/ubuntu:bionic + steps: + - name: Install + run: | + apt-get update -y + apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev + curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh + - uses: actions/checkout@v1 #This version must stay old enough to remain compatible with the container image + - name: Test + run: | + source ~/.ghcup/env + cabal update + cabal test + # TODO: Consider testing with -fdebug diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 2bb7029c..61c0edd2 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -15,6 +15,8 @@ {-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} {-# OPTIONS_HADDOCK not-home #-} +#include "MachDeps.h" + -- | = WARNING -- -- This module is considered __internal__. @@ -125,9 +127,9 @@ module Data.HashMap.Internal , sparseIndex , two , unionArrayBy - , update32 - , update32M - , update32With' + , updateFullArray + , updateFullArrayM + , updateFullArrayWith' , updateOrConcatWithKey , filterMapAux , equalKeys @@ -830,7 +832,7 @@ insert' h0 k0 v0 m0 = go h0 k0 v0 0 m0 !st' = go h k x (nextShift s) st in if st' `ptrEq` st then t - else Full (update32 ary i st') + else Full (updateFullArray ary i st') where i = index h s go h k x s t@(Collision hy v) | h == hy = Collision h (updateOrSnocWith (\a _ -> (# a #)) k x v) @@ -864,7 +866,7 @@ insertNewKey !h0 !k0 x0 !m0 = go h0 k0 x0 0 m0 go h k x s (Full ary) = let !st = A.index ary i !st' = go h k x (nextShift s) st - in Full (update32 ary i st') + in Full (updateFullArray ary i st') where i = index h s go h k x s t@(Collision hy v) | h == hy = Collision h (A.snoc v (L k x)) @@ -893,7 +895,7 @@ insertKeyExists !collPos0 !h0 !k0 x0 !m0 = go collPos0 h0 k0 x0 m0 go collPos shiftedHash k x (Full ary) = let !st = A.index ary i !st' = go collPos (shiftHash shiftedHash) k x st - in Full (update32 ary i st') + in Full (updateFullArray ary i st') where i = index' shiftedHash go collPos _shiftedHash k x (Collision h v) | collPos >= 0 = Collision h (setAtPosition collPos k x v) @@ -1041,7 +1043,7 @@ insertModifying x f k0 m0 = go h0 k0 0 m0 go h k s t@(Full ary) = let !st = A.index ary i !st' = go h k (nextShift s) st - ary' = update32 ary i $! st' + ary' = updateFullArray ary i $! st' in if ptrEq st st' then t else Full ary' @@ -1270,7 +1272,7 @@ adjust# f k0 m0 = go h0 k0 0 m0 let i = index h s !st = A.index ary i !st' = go h k (nextShift s) st - ary' = update32 ary i $! st' + ary' = updateFullArray ary i $! st' in if ptrEq st st' then t else Full ary' @@ -1554,6 +1556,9 @@ submapBitmapIndexed comp !b1 !ary1 !b2 !ary2 = subsetBitmaps && go 0 0 (b1Orb2 . where go :: Int -> Int -> Bitmap -> Bool go !i !j !m + + -- Note: m can overflow to 0 when maxChildren == WORD_SIZE_IN_BITS. See + -- #491. In that case there needs to be a check '| m == 0 = True' | m > b1Orb2 = True -- In case a key is both in ary1 and ary2, check ary1[i] <= ary2[j] and @@ -1660,12 +1665,12 @@ unionWithKey f = go 0 go s (Full ary1) t2 = let h2 = leafHashCode t2 i = index h2 s - ary' = update32With' ary1 i $ \st1 -> go (nextShift s) st1 t2 + ary' = updateFullArrayWith' ary1 i $ \st1 -> go (nextShift s) st1 t2 in Full ary' go s t1 (Full ary2) = let h1 = leafHashCode t1 i = index h1 s - ary' = update32With' ary2 i $ \st2 -> go (nextShift s) t1 st2 + ary' = updateFullArrayWith' ary2 i $ \st2 -> go (nextShift s) t1 st2 in Full ary' leafHashCode (Leaf h _) = h @@ -2406,24 +2411,24 @@ subsetArray cmpV ary1 ary2 = A.length ary1 <= A.length ary2 && A.all inAry2 ary1 -- Manually unrolled loops -- | \(O(n)\) Update the element at the given position in this array. -update32 :: A.Array e -> Int -> e -> A.Array e -update32 ary idx b = runST (update32M ary idx b) -{-# INLINE update32 #-} +updateFullArray :: A.Array e -> Int -> e -> A.Array e +updateFullArray ary idx b = runST (updateFullArrayM ary idx b) +{-# INLINE updateFullArray #-} -- | \(O(n)\) Update the element at the given position in this array. -update32M :: A.Array e -> Int -> e -> ST s (A.Array e) -update32M ary idx b = do +updateFullArrayM :: A.Array e -> Int -> e -> ST s (A.Array e) +updateFullArrayM ary idx b = do mary <- clone ary A.write mary idx b A.unsafeFreeze mary -{-# INLINE update32M #-} +{-# INLINE updateFullArrayM #-} -- | \(O(n)\) Update the element at the given position in this array, by applying a function to it. -update32With' :: A.Array e -> Int -> (e -> e) -> A.Array e -update32With' ary idx f +updateFullArrayWith' :: A.Array e -> Int -> (e -> e) -> A.Array e +updateFullArrayWith' ary idx f | (# x #) <- A.index# ary idx - = update32 ary idx $! f x -{-# INLINE update32With' #-} + = updateFullArray ary idx $! f x +{-# INLINE updateFullArrayWith' #-} -- | Unsafely clone an array of (2^bitsPerSubkey) elements. The length of the input -- array is not checked. @@ -2440,8 +2445,16 @@ clone ary = -- | Number of bits that are inspected at each level of the hash tree. -- -- This constant is named /t/ in the original /Ideal Hash Trees/ paper. +-- +-- Note that this constant is platform-dependent. On 32-bit platforms we use +-- '4', because bitmaps using '2^5' bits turned out to be prone to integer +-- overflow bugs. See #491 for instance. bitsPerSubkey :: Int +#if WORD_SIZE_IN_BITS < 64 +bitsPerSubkey = 4 +#else bitsPerSubkey = 5 +#endif -- | The size of a 'Full' node, i.e. @2 ^ 'bitsPerSubkey'@. maxChildren :: Int diff --git a/Data/HashMap/Internal/Strict.hs b/Data/HashMap/Internal/Strict.hs index a76bffb8..ce9a48fa 100644 --- a/Data/HashMap/Internal/Strict.hs +++ b/Data/HashMap/Internal/Strict.hs @@ -39,7 +39,7 @@ -- strings. -- -- Many operations have a average-case complexity of \(O(\log n)\). The --- implementation uses a large base (i.e. 32) so in practice these +-- implementation uses a large base (i.e. 16 or 32) so in practice these -- operations are constant time. module Data.HashMap.Internal.Strict ( @@ -211,7 +211,7 @@ insertWith f k0 v0 m0 = go h0 k0 v0 0 m0 go h k x s (Full ary) = let st = A.index ary i st' = go h k x (nextShift s) st - ary' = HM.update32 ary i $! st' + ary' = HM.updateFullArray ary i $! st' in Full ary' where i = index h s go h k x s t@(Collision hy v) @@ -282,7 +282,7 @@ adjust f k0 m0 = go h0 k0 0 m0 let i = index h s st = A.index ary i st' = go h k (nextShift s) st - ary' = HM.update32 ary i $! st' + ary' = HM.updateFullArray ary i $! st' in Full ary' go h k _ t@(Collision hy v) | h == hy = Collision h (updateWith f k v) @@ -516,12 +516,12 @@ unionWithKey f = go 0 go s (Full ary1) t2 = let h2 = leafHashCode t2 i = index h2 s - ary' = HM.update32With' ary1 i $ \st1 -> go (nextShift s) st1 t2 + ary' = HM.updateFullArrayWith' ary1 i $ \st1 -> go (nextShift s) st1 t2 in Full ary' go s t1 (Full ary2) = let h1 = leafHashCode t1 i = index h1 s - ary' = HM.update32With' ary2 i $ \st2 -> go (nextShift s) t1 st2 + ary' = HM.updateFullArrayWith' ary2 i $ \st2 -> go (nextShift s) t1 st2 in Full ary' leafHashCode (Leaf h _) = h diff --git a/Data/HashMap/Lazy.hs b/Data/HashMap/Lazy.hs index 82697c0f..80e3894e 100644 --- a/Data/HashMap/Lazy.hs +++ b/Data/HashMap/Lazy.hs @@ -20,7 +20,7 @@ -- strings. -- -- Many operations have a average-case complexity of \(O(\log n)\). The --- implementation uses a large base (i.e. 32) so in practice these +-- implementation uses a large base (i.e. 16 or 32) so in practice these -- operations are constant time. module Data.HashMap.Lazy ( diff --git a/Data/HashMap/Strict.hs b/Data/HashMap/Strict.hs index cd5baa6c..c1d30e88 100644 --- a/Data/HashMap/Strict.hs +++ b/Data/HashMap/Strict.hs @@ -19,7 +19,7 @@ -- strings. -- -- Many operations have a average-case complexity of \(O(\log n)\). The --- implementation uses a large base (i.e. 32) so in practice these +-- implementation uses a large base (i.e. 16 or 32) so in practice these -- operations are constant time. module Data.HashMap.Strict ( diff --git a/Data/HashSet.hs b/Data/HashSet.hs index 6d589189..330af38a 100644 --- a/Data/HashSet.hs +++ b/Data/HashSet.hs @@ -87,7 +87,7 @@ especially when value comparisons are expensive, as in the case of strings. Many operations have a average-case complexity of \(O(\log n)\). The -implementation uses a large base (i.e. 16) so in practice these +implementation uses a large base (i.e. 16 or 32) so in practice these operations are constant time. -} diff --git a/Data/HashSet/Internal.hs b/Data/HashSet/Internal.hs index be1af480..38cf1828 100644 --- a/Data/HashSet/Internal.hs +++ b/Data/HashSet/Internal.hs @@ -37,7 +37,7 @@ -- strings. -- -- Many operations have a average-case complexity of \(O(\log n)\). The --- implementation uses a large base (i.e. 32) so in practice these +-- implementation uses a large base (i.e. 16 or 32) so in practice these -- operations are constant time. module Data.HashSet.Internal diff --git a/docs/developer-guide.md b/docs/developer-guide.md index 5af0c515..5faad514 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -103,7 +103,8 @@ Here's a quick overview in order of simplicity: it contains *2^B* elements. The number of bits of the hash value to use at each level of the tree, *B*, is a -compile time constant, currently 5. In general a larger *B* improves lookup +compile time constant, currently 5 on 64-bit platforms, and 4 on platforms with +a smaller word size. In general a larger *B* improves lookup performance (shallower tree) but hurts modification (large nodes to copy when updating the spine of the tree). diff --git a/tests/Main.hs b/tests/Main.hs index 9e337ad2..5880ba72 100644 --- a/tests/Main.hs +++ b/tests/Main.hs @@ -1,5 +1,6 @@ module Main (main) where +import GHC.IO.Encoding (setLocaleEncoding, utf8) import Test.Tasty (defaultMain, testGroup) import qualified Properties @@ -7,8 +8,10 @@ import qualified Regressions import qualified Strictness main :: IO () -main = defaultMain $ testGroup "All" - [ Properties.tests - , Regressions.tests - , Strictness.tests - ] +main = do + setLocaleEncoding utf8 + defaultMain $ testGroup "All" + [ Properties.tests + , Regressions.tests + , Strictness.tests + ] diff --git a/tests/Regressions.hs b/tests/Regressions.hs index 52af1070..1b2d3c5c 100644 --- a/tests/Regressions.hs +++ b/tests/Regressions.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BinaryLiterals #-} {-# LANGUAGE CPP #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -26,6 +27,7 @@ import Test.Tasty.QuickCheck (testProperty) import qualified Data.HashMap.Lazy as HML import qualified Data.HashMap.Strict as HMS import qualified Data.HashSet as HS +import qualified Test.Tasty as Tasty #if MIN_VERSION_base(4,12,0) -- nothunks requires base >= 4.12 @@ -262,6 +264,17 @@ issue420 = do assert $ k1 `HS.member` s1 assert $ k2 `HS.member` s1 +------------------------------------------------------------------------ +-- Issue 491 + +issue491 :: TestTree +issue491 = Tasty.localOption (Tasty.mkTimeout 1000000) $ testGroup "issue491" $ + [ testCase "1" $ assert $ m [0, -1] `HML.isSubmapOf` m [0, -1] + , testCase "2" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111] + , testCase "3" $ assert $ m [0, 1] `HML.isSubmapOf` m [0, 1, 0b11111] + ] + where m = HS.toMap . HS.fromList @Int + ------------------------------------------------------------------------ -- * Test list @@ -292,4 +305,5 @@ tests = testGroup "Regression tests" , testCase "issue383" issue383 #endif , testCase "issue420" issue420 + , issue491 ] diff --git a/tests/Util/Key.hs b/tests/Util/Key.hs index a3d1476b..9088d110 100644 --- a/tests/Util/Key.hs +++ b/tests/Util/Key.hs @@ -46,6 +46,9 @@ arbitraryHash = do [ (2, fromIntegral . QC.getLarge <$> arbitrary @(Large Word16)) , (1, QC.getSmall <$> arbitrary) , (1, QC.getLarge <$> arbitrary) + -- Hashes where the lowest `maxChildren` bits are set are interesting + -- edge cases. See #491. + , (1, QC.elements [-1, 0xFF, 0xFFF]) ] i <- QC.frequency gens moreCollisions' <- QC.elements [moreCollisions, id] @@ -53,10 +56,11 @@ arbitraryHash = do -- | Mask out most bits to produce more collisions moreCollisions :: Int -> Int -moreCollisions w = fromIntegral (w .&. mask) +moreCollisions w = fromIntegral (w .&. moreCollisionsMask) -mask :: Int -mask = sum [bit n | n <- [0, 3, 8, 14, 61]] +-- | Bitmask for @moreCollisions@ +moreCollisionsMask :: Int +moreCollisionsMask = sum [bit n | n <- [0, 3, 8, 14, 61]] keyToInt :: Key -> Int keyToInt (K h x) = h * fromEnum x