Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/32bit-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# This config is mostly copied from
# https://github.com/haskell/bytestring/blob/master/.github/workflows/ci.yml

name: 32bit-ci
on:
push:
branches:
- master
pull_request: {} # Validate all PRs

defaults:
run:
shell: bash

jobs:
i386:
runs-on: ubuntu-latest
container:
image: i386/ubuntu:bionic
steps:
- name: Install
run: |
apt-get update -y
apt-get install -y autoconf build-essential zlib1g-dev libgmp-dev curl libncurses5 libtinfo5 libncurses5-dev libtinfo-dev
curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_INSTALL_NO_STACK=1 sh
- uses: actions/checkout@v1 #This version must stay old enough to remain compatible with the container image
- name: Test
run: |
source ~/.ghcup/env
cabal update
cabal test
# TODO: Consider testing with -fdebug
53 changes: 33 additions & 20 deletions Data/HashMap/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
{-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-}
{-# OPTIONS_HADDOCK not-home #-}

#include "MachDeps.h"

-- | = WARNING
--
-- This module is considered __internal__.
Expand Down Expand Up @@ -125,9 +127,9 @@ module Data.HashMap.Internal
, sparseIndex
, two
, unionArrayBy
, update32
, update32M
, update32With'
, updateFullArray
, updateFullArrayM
, updateFullArrayWith'
, updateOrConcatWithKey
, filterMapAux
, equalKeys
Expand Down Expand Up @@ -830,7 +832,7 @@ insert' h0 k0 v0 m0 = go h0 k0 v0 0 m0
!st' = go h k x (nextShift s) st
in if st' `ptrEq` st
then t
else Full (update32 ary i st')
else Full (updateFullArray ary i st')
where i = index h s
go h k x s t@(Collision hy v)
| h == hy = Collision h (updateOrSnocWith (\a _ -> (# a #)) k x v)
Expand Down Expand Up @@ -864,7 +866,7 @@ insertNewKey !h0 !k0 x0 !m0 = go h0 k0 x0 0 m0
go h k x s (Full ary) =
let !st = A.index ary i
!st' = go h k x (nextShift s) st
in Full (update32 ary i st')
in Full (updateFullArray ary i st')
where i = index h s
go h k x s t@(Collision hy v)
| h == hy = Collision h (A.snoc v (L k x))
Expand Down Expand Up @@ -893,7 +895,7 @@ insertKeyExists !collPos0 !h0 !k0 x0 !m0 = go collPos0 h0 k0 x0 m0
go collPos shiftedHash k x (Full ary) =
let !st = A.index ary i
!st' = go collPos (shiftHash shiftedHash) k x st
in Full (update32 ary i st')
in Full (updateFullArray ary i st')
where i = index' shiftedHash
go collPos _shiftedHash k x (Collision h v)
| collPos >= 0 = Collision h (setAtPosition collPos k x v)
Expand Down Expand Up @@ -1041,7 +1043,7 @@ insertModifying x f k0 m0 = go h0 k0 0 m0
go h k s t@(Full ary) =
let !st = A.index ary i
!st' = go h k (nextShift s) st
ary' = update32 ary i $! st'
ary' = updateFullArray ary i $! st'
in if ptrEq st st'
then t
else Full ary'
Expand Down Expand Up @@ -1270,7 +1272,7 @@ adjust# f k0 m0 = go h0 k0 0 m0
let i = index h s
!st = A.index ary i
!st' = go h k (nextShift s) st
ary' = update32 ary i $! st'
ary' = updateFullArray ary i $! st'
in if ptrEq st st'
then t
else Full ary'
Expand Down Expand Up @@ -1554,6 +1556,9 @@ submapBitmapIndexed comp !b1 !ary1 !b2 !ary2 = subsetBitmaps && go 0 0 (b1Orb2 .
where
go :: Int -> Int -> Bitmap -> Bool
go !i !j !m

-- Note: m can overflow to 0 when maxChildren == WORD_SIZE_IN_BITS. See
-- #491. In that case there needs to be a check '| m == 0 = True'
| m > b1Orb2 = True

-- In case a key is both in ary1 and ary2, check ary1[i] <= ary2[j] and
Expand Down Expand Up @@ -1660,12 +1665,12 @@ unionWithKey f = go 0
go s (Full ary1) t2 =
let h2 = leafHashCode t2
i = index h2 s
ary' = update32With' ary1 i $ \st1 -> go (nextShift s) st1 t2
ary' = updateFullArrayWith' ary1 i $ \st1 -> go (nextShift s) st1 t2
in Full ary'
go s t1 (Full ary2) =
let h1 = leafHashCode t1
i = index h1 s
ary' = update32With' ary2 i $ \st2 -> go (nextShift s) t1 st2
ary' = updateFullArrayWith' ary2 i $ \st2 -> go (nextShift s) t1 st2
in Full ary'

leafHashCode (Leaf h _) = h
Expand Down Expand Up @@ -2406,24 +2411,24 @@ subsetArray cmpV ary1 ary2 = A.length ary1 <= A.length ary2 && A.all inAry2 ary1
-- Manually unrolled loops

-- | \(O(n)\) Update the element at the given position in this array.
update32 :: A.Array e -> Int -> e -> A.Array e
update32 ary idx b = runST (update32M ary idx b)
{-# INLINE update32 #-}
updateFullArray :: A.Array e -> Int -> e -> A.Array e
updateFullArray ary idx b = runST (updateFullArrayM ary idx b)
{-# INLINE updateFullArray #-}

-- | \(O(n)\) Update the element at the given position in this array.
update32M :: A.Array e -> Int -> e -> ST s (A.Array e)
update32M ary idx b = do
updateFullArrayM :: A.Array e -> Int -> e -> ST s (A.Array e)
updateFullArrayM ary idx b = do
mary <- clone ary
A.write mary idx b
A.unsafeFreeze mary
{-# INLINE update32M #-}
{-# INLINE updateFullArrayM #-}

-- | \(O(n)\) Update the element at the given position in this array, by applying a function to it.
update32With' :: A.Array e -> Int -> (e -> e) -> A.Array e
update32With' ary idx f
updateFullArrayWith' :: A.Array e -> Int -> (e -> e) -> A.Array e
updateFullArrayWith' ary idx f
| (# x #) <- A.index# ary idx
= update32 ary idx $! f x
{-# INLINE update32With' #-}
= updateFullArray ary idx $! f x
{-# INLINE updateFullArrayWith' #-}

-- | Unsafely clone an array of (2^bitsPerSubkey) elements. The length of the input
-- array is not checked.
Expand All @@ -2440,8 +2445,16 @@ clone ary =
-- | Number of bits that are inspected at each level of the hash tree.
--
-- This constant is named /t/ in the original /Ideal Hash Trees/ paper.
--
-- Note that this constant is platform-dependent. On 32-bit platforms we use
-- '4', because bitmaps using '2^5' bits turned out to be prone to integer
-- overflow bugs. See #491 for instance.
bitsPerSubkey :: Int
#if WORD_SIZE_IN_BITS < 64
bitsPerSubkey = 4
#else
bitsPerSubkey = 5
#endif

-- | The size of a 'Full' node, i.e. @2 ^ 'bitsPerSubkey'@.
maxChildren :: Int
Expand Down
10 changes: 5 additions & 5 deletions Data/HashMap/Internal/Strict.hs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
-- strings.
--
-- Many operations have a average-case complexity of \(O(\log n)\). The
-- implementation uses a large base (i.e. 32) so in practice these
-- implementation uses a large base (i.e. 16 or 32) so in practice these
-- operations are constant time.
module Data.HashMap.Internal.Strict
(
Expand Down Expand Up @@ -211,7 +211,7 @@ insertWith f k0 v0 m0 = go h0 k0 v0 0 m0
go h k x s (Full ary) =
let st = A.index ary i
st' = go h k x (nextShift s) st
ary' = HM.update32 ary i $! st'
ary' = HM.updateFullArray ary i $! st'
in Full ary'
where i = index h s
go h k x s t@(Collision hy v)
Expand Down Expand Up @@ -282,7 +282,7 @@ adjust f k0 m0 = go h0 k0 0 m0
let i = index h s
st = A.index ary i
st' = go h k (nextShift s) st
ary' = HM.update32 ary i $! st'
ary' = HM.updateFullArray ary i $! st'
in Full ary'
go h k _ t@(Collision hy v)
| h == hy = Collision h (updateWith f k v)
Expand Down Expand Up @@ -516,12 +516,12 @@ unionWithKey f = go 0
go s (Full ary1) t2 =
let h2 = leafHashCode t2
i = index h2 s
ary' = HM.update32With' ary1 i $ \st1 -> go (nextShift s) st1 t2
ary' = HM.updateFullArrayWith' ary1 i $ \st1 -> go (nextShift s) st1 t2
in Full ary'
go s t1 (Full ary2) =
let h1 = leafHashCode t1
i = index h1 s
ary' = HM.update32With' ary2 i $ \st2 -> go (nextShift s) t1 st2
ary' = HM.updateFullArrayWith' ary2 i $ \st2 -> go (nextShift s) t1 st2
in Full ary'

leafHashCode (Leaf h _) = h
Expand Down
2 changes: 1 addition & 1 deletion Data/HashMap/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
-- strings.
--
-- Many operations have a average-case complexity of \(O(\log n)\). The
-- implementation uses a large base (i.e. 32) so in practice these
-- implementation uses a large base (i.e. 16 or 32) so in practice these
-- operations are constant time.
module Data.HashMap.Lazy
(
Expand Down
2 changes: 1 addition & 1 deletion Data/HashMap/Strict.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
-- strings.
--
-- Many operations have a average-case complexity of \(O(\log n)\). The
-- implementation uses a large base (i.e. 32) so in practice these
-- implementation uses a large base (i.e. 16 or 32) so in practice these
-- operations are constant time.
module Data.HashMap.Strict
(
Expand Down
2 changes: 1 addition & 1 deletion Data/HashSet.hs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ especially when value comparisons are expensive, as in the case of
strings.

Many operations have a average-case complexity of \(O(\log n)\). The
implementation uses a large base (i.e. 16) so in practice these
implementation uses a large base (i.e. 16 or 32) so in practice these
operations are constant time.
-}

Expand Down
2 changes: 1 addition & 1 deletion Data/HashSet/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
-- strings.
--
-- Many operations have a average-case complexity of \(O(\log n)\). The
-- implementation uses a large base (i.e. 32) so in practice these
-- implementation uses a large base (i.e. 16 or 32) so in practice these
-- operations are constant time.

module Data.HashSet.Internal
Expand Down
3 changes: 2 additions & 1 deletion docs/developer-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ Here's a quick overview in order of simplicity:
it contains *2^B* elements.

The number of bits of the hash value to use at each level of the tree, *B*, is a
compile time constant, currently 5. In general a larger *B* improves lookup
compile time constant, currently 5 on 64-bit platforms, and 4 on platforms with
a smaller word size. In general a larger *B* improves lookup
performance (shallower tree) but hurts modification (large nodes to copy when
updating the spine of the tree).

Expand Down
13 changes: 8 additions & 5 deletions tests/Main.hs
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
module Main (main) where

import GHC.IO.Encoding (setLocaleEncoding, utf8)
import Test.Tasty (defaultMain, testGroup)

import qualified Properties
import qualified Regressions
import qualified Strictness

main :: IO ()
main = defaultMain $ testGroup "All"
[ Properties.tests
, Regressions.tests
, Strictness.tests
]
main = do
setLocaleEncoding utf8
defaultMain $ testGroup "All"
[ Properties.tests
, Regressions.tests
, Strictness.tests
]
14 changes: 14 additions & 0 deletions tests/Regressions.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE ScopedTypeVariables #-}
Expand Down Expand Up @@ -26,6 +27,7 @@ import Test.Tasty.QuickCheck (testProperty)
import qualified Data.HashMap.Lazy as HML
import qualified Data.HashMap.Strict as HMS
import qualified Data.HashSet as HS
import qualified Test.Tasty as Tasty

#if MIN_VERSION_base(4,12,0)
-- nothunks requires base >= 4.12
Expand Down Expand Up @@ -262,6 +264,17 @@ issue420 = do
assert $ k1 `HS.member` s1
assert $ k2 `HS.member` s1

------------------------------------------------------------------------
-- Issue 491

issue491 :: TestTree
issue491 = Tasty.localOption (Tasty.mkTimeout 1000000) $ testGroup "issue491" $
[ testCase "1" $ assert $ m [0, -1] `HML.isSubmapOf` m [0, -1]
, testCase "2" $ assert $ m [1, 0b11111] `HML.isSubmapOf` m [1, 0b11111]
, testCase "3" $ assert $ m [0, 1] `HML.isSubmapOf` m [0, 1, 0b11111]
]
where m = HS.toMap . HS.fromList @Int

------------------------------------------------------------------------
-- * Test list

Expand Down Expand Up @@ -292,4 +305,5 @@ tests = testGroup "Regression tests"
, testCase "issue383" issue383
#endif
, testCase "issue420" issue420
, issue491
]
10 changes: 7 additions & 3 deletions tests/Util/Key.hs
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,21 @@ arbitraryHash = do
[ (2, fromIntegral . QC.getLarge <$> arbitrary @(Large Word16))
, (1, QC.getSmall <$> arbitrary)
, (1, QC.getLarge <$> arbitrary)
-- Hashes where the lowest `maxChildren` bits are set are interesting
-- edge cases. See #491.
, (1, QC.elements [-1, 0xFF, 0xFFF])
]
i <- QC.frequency gens
moreCollisions' <- QC.elements [moreCollisions, id]
pure (moreCollisions' i)

-- | Mask out most bits to produce more collisions
moreCollisions :: Int -> Int
moreCollisions w = fromIntegral (w .&. mask)
moreCollisions w = fromIntegral (w .&. moreCollisionsMask)

mask :: Int
mask = sum [bit n | n <- [0, 3, 8, 14, 61]]
-- | Bitmask for @moreCollisions@
moreCollisionsMask :: Int
moreCollisionsMask = sum [bit n | n <- [0, 3, 8, 14, 61]]

keyToInt :: Key -> Int
keyToInt (K h x) = h * fromEnum x
Expand Down