summaryrefslogtreecommitdiff
path: root/libraries/base/GHC/Event/IntMap.hs
blob: eee0cc5fa9dbefc62c7defad8d5166f2657d1eb6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE CPP, MagicHash, NoImplicitPrelude #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  GHC.Event.IntMap
-- Copyright   :  (c) Daan Leijen 2002
--                (c) Andriy Palamarchuk 2008
-- License     :  BSD-style
-- Maintainer  :  libraries@haskell.org
-- Stability   :  provisional
-- Portability :  portable
--
-- An efficient implementation of maps from integer keys to values.
--
-- Since many function names (but not the type name) clash with
-- "Prelude" names, this module is usually imported @qualified@, e.g.
--
-- >  import Data.IntMap (IntMap)
-- >  import qualified Data.IntMap as IntMap
--
-- The implementation is based on /big-endian patricia trees/.  This data
-- structure performs especially well on binary operations like 'union'
-- and 'intersection'.  However, my benchmarks show that it is also
-- (much) faster on insertions and deletions when compared to a generic
-- size-balanced map implementation (see "Data.Map").
--
--    * Chris Okasaki and Andy Gill,  \"/Fast Mergeable Integer Maps/\",
--      Workshop on ML, September 1998, pages 77-86,
--      <http://citeseer.ist.psu.edu/okasaki98fast.html>
--
--    * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve
--      Information Coded In Alphanumeric/\", Journal of the ACM, 15(4),
--      October 1968, pages 514-534.
--
-- Operation comments contain the operation time complexity in
-- the Big-O notation <http://en.wikipedia.org/wiki/Big_O_notation>.
-- Many operations have a worst-case complexity of /O(min(n,W))/.
-- This means that the operation can become linear in the number of
-- elements with a maximum of /W/ -- the number of bits in an 'Int'
-- (32 or 64).
--
-----------------------------------------------------------------------------

module GHC.Event.IntMap
    (
    -- * Map type
    IntMap
    , Key

    -- * Query
    , lookup
    , member

    -- * Construction
    , empty

    -- * Insertion
    , insertWith

    -- * Delete\/Update
    , delete
    , updateWith

    -- * Traversal
    -- ** Fold
    , foldWithKey

    -- * Conversion
    , keys
    ) where

import Data.Bits

import Data.Maybe (Maybe(..))
import GHC.Base hiding (foldr)
import GHC.Num (Num(..))
import GHC.Real (fromIntegral)
import GHC.Show (Show(showsPrec), showParen, shows, showString)

#if __GLASGOW_HASKELL__
import GHC.Word (Word(..))
#else
import Data.Word
#endif

-- | A @Nat@ is a natural machine word (an unsigned Int)
type Nat = Word

natFromInt :: Key -> Nat
natFromInt i = fromIntegral i

intFromNat :: Nat -> Key
intFromNat w = fromIntegral w

shiftRL :: Nat -> Key -> Nat
#if __GLASGOW_HASKELL__
-- GHC: use unboxing to get @shiftRL@ inlined.
shiftRL (W# x) (I# i) = W# (shiftRL# x i)
#else
shiftRL x i = shiftR x i
#endif

------------------------------------------------------------------------
-- Types

-- | A map of integers to values @a@.
data IntMap a = Nil
              | Tip {-# UNPACK #-} !Key !a
              | Bin {-# UNPACK #-} !Prefix
                    {-# UNPACK #-} !Mask
                    !(IntMap a)
                    !(IntMap a)

type Prefix = Int
type Mask   = Int
type Key    = Int

------------------------------------------------------------------------
-- Query

-- | /O(min(n,W))/ Lookup the value at a key in the map.  See also
-- 'Data.Map.lookup'.
lookup :: Key -> IntMap a -> Maybe a
lookup k t = let nk = natFromInt k in seq nk (lookupN nk t)

lookupN :: Nat -> IntMap a -> Maybe a
lookupN k t
  = case t of
      Bin _ m l r
        | zeroN k (natFromInt m) -> lookupN k l
        | otherwise              -> lookupN k r
      Tip kx x
        | (k == natFromInt kx)  -> Just x
        | otherwise             -> Nothing
      Nil -> Nothing

-- | /O(min(n,W))/. Is the key a member of the map?
--
-- > member 5 (fromList [(5,'a'), (3,'b')]) == True
-- > member 1 (fromList [(5,'a'), (3,'b')]) == False

member :: Key -> IntMap a -> Bool
member k m
  = case lookup k m of
      Nothing -> False
      Just _  -> True

------------------------------------------------------------------------
-- Construction

-- | /O(1)/ The empty map.
--
-- > empty      == fromList []
-- > size empty == 0
empty :: IntMap a
empty = Nil

------------------------------------------------------------------------
-- Insert

-- | /O(min(n,W))/ Insert with a function, combining new value and old
-- value.  @insertWith f key value mp@ will insert the pair (key,
-- value) into @mp@ if key does not exist in the map.  If the key does
-- exist, the function will insert the pair (key, f new_value
-- old_value).  The result is a pair where the first element is the
-- old value, if one was present, and the second is the modified map.
insertWith :: (a -> a -> a) -> Key -> a -> IntMap a -> (Maybe a, IntMap a)
insertWith f k x t = case t of
    Bin p m l r
        | nomatch k p m -> (Nothing, join k (Tip k x) p t)
        | zero k m      -> let (found, l') = insertWith f k x l
                           in (found, Bin p m l' r)
        | otherwise     -> let (found, r') = insertWith f k x r
                           in (found, Bin p m l r')
    Tip ky y
        | k == ky       -> (Just y, Tip k (f x y))
        | otherwise     -> (Nothing, join k (Tip k x) ky t)
    Nil                 -> (Nothing, Tip k x)


------------------------------------------------------------------------
-- Delete/Update

-- | /O(min(n,W))/. Delete a key and its value from the map.  When the
-- key is not a member of the map, the original map is returned.  The
-- result is a pair where the first element is the value associated
-- with the deleted key, if one existed, and the second element is the
-- modified map.
delete :: Key -> IntMap a -> (Maybe a, IntMap a)
delete k t = case t of
   Bin p m l r
        | nomatch k p m -> (Nothing, t)
        | zero k m      -> let (found, l') = delete k l
                           in (found, bin p m l' r)
        | otherwise     -> let (found, r') = delete k r
                           in (found, bin p m l r')
   Tip ky y
        | k == ky       -> (Just y, Nil)
        | otherwise     -> (Nothing, t)
   Nil                  -> (Nothing, Nil)

updateWith :: (a -> Maybe a) -> Key -> IntMap a -> (Maybe a, IntMap a)
updateWith f k t = case t of
    Bin p m l r
        | nomatch k p m -> (Nothing, t)
        | zero k m      -> let (found, l') = updateWith f k l
                           in (found, bin p m l' r)
        | otherwise     -> let (found, r') = updateWith f k r
                           in (found, bin p m l r')
    Tip ky y
        | k == ky       -> case (f y) of
                               Just y' -> (Just y, Tip ky y')
                               Nothing -> (Just y, Nil)
        | otherwise     -> (Nothing, t)
    Nil                 -> (Nothing, Nil)
-- | /O(n)/. Fold the keys and values in the map, such that
-- @'foldWithKey' f z == 'Prelude.foldr' ('uncurry' f) z . 'toAscList'@.
-- For example,
--
-- > keys map = foldWithKey (\k x ks -> k:ks) [] map
--
-- > let f k a result = result ++ "(" ++ (show k) ++ ":" ++ a ++ ")"
-- > foldWithKey f "Map: " (fromList [(5,"a"), (3,"b")]) == "Map: (5:a)(3:b)"

foldWithKey :: (Key -> a -> b -> b) -> b -> IntMap a -> b
foldWithKey f z t
  = foldr f z t

-- | /O(n)/. Convert the map to a list of key\/value pairs.
--
-- > toList (fromList [(5,"a"), (3,"b")]) == [(3,"b"), (5,"a")]
-- > toList empty == []

toList :: IntMap a -> [(Key,a)]
toList t
  = foldWithKey (\k x xs -> (k,x):xs) [] t

foldr :: (Key -> a -> b -> b) -> b -> IntMap a -> b
foldr f z t
  = case t of
      Bin 0 m l r | m < 0 -> foldr' f (foldr' f z l) r  -- put negative numbers before.
      Bin _ _ _ _ -> foldr' f z t
      Tip k x     -> f k x z
      Nil         -> z

foldr' :: (Key -> a -> b -> b) -> b -> IntMap a -> b
foldr' f z t
  = case t of
      Bin _ _ l r -> foldr' f (foldr' f z r) l
      Tip k x     -> f k x z
      Nil         -> z

-- | /O(n)/. Return all keys of the map in ascending order.
--
-- > keys (fromList [(5,"a"), (3,"b")]) == [3,5]
-- > keys empty == []

keys  :: IntMap a -> [Key]
keys m
  = foldWithKey (\k _ ks -> k:ks) [] m

------------------------------------------------------------------------
-- Eq

instance Eq a => Eq (IntMap a) where
    t1 == t2 = equal t1 t2
    t1 /= t2 = nequal t1 t2

equal :: Eq a => IntMap a -> IntMap a -> Bool
equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
    = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
equal (Tip kx x) (Tip ky y)
    = (kx == ky) && (x==y)
equal Nil Nil = True
equal _   _   = False

nequal :: Eq a => IntMap a -> IntMap a -> Bool
nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
    = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
nequal (Tip kx x) (Tip ky y)
    = (kx /= ky) || (x/=y)
nequal Nil Nil = False
nequal _   _   = True

instance Show a => Show (IntMap a) where
  showsPrec d m   = showParen (d > 10) $
    showString "fromList " . shows (toList m)

------------------------------------------------------------------------
-- Utility functions

join :: Prefix -> IntMap a -> Prefix -> IntMap a -> IntMap a
join p1 t1 p2 t2
  | zero p1 m = Bin p m t1 t2
  | otherwise = Bin p m t2 t1
  where
    m = branchMask p1 p2
    p = mask p1 m

-- | @bin@ assures that we never have empty trees within a tree.
bin :: Prefix -> Mask -> IntMap a -> IntMap a -> IntMap a
bin _ _ l Nil = l
bin _ _ Nil r = r
bin p m l r   = Bin p m l r

------------------------------------------------------------------------
-- Endian independent bit twiddling

zero :: Key -> Mask -> Bool
zero i m = (natFromInt i) .&. (natFromInt m) == 0

nomatch :: Key -> Prefix -> Mask -> Bool
nomatch i p m = (mask i m) /= p

mask :: Key -> Mask -> Prefix
mask i m = maskW (natFromInt i) (natFromInt m)

zeroN :: Nat -> Nat -> Bool
zeroN i m = (i .&. m) == 0

------------------------------------------------------------------------
-- Big endian operations

maskW :: Nat -> Nat -> Prefix
maskW i m = intFromNat (i .&. (complement (m-1) `xor` m))

branchMask :: Prefix -> Prefix -> Mask
branchMask p1 p2
    = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))

{-
Finding the highest bit mask in a word [x] can be done efficiently in
three ways:

* convert to a floating point value and the mantissa tells us the
  [log2(x)] that corresponds with the highest bit position. The mantissa
  is retrieved either via the standard C function [frexp] or by some bit
  twiddling on IEEE compatible numbers (float). Note that one needs to
  use at least [double] precision for an accurate mantissa of 32 bit
  numbers.

* use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).

* use processor specific assembler instruction (asm).

The most portable way would be [bit], but is it efficient enough?
I have measured the cycle counts of the different methods on an AMD
Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:

highestBitMask: method  cycles
                --------------
                 frexp   200
                 float    33
                 bit      11
                 asm      12

Wow, the bit twiddling is on today's RISC like machines even faster
than a single CISC instruction (BSR)!
-}

-- | @highestBitMask@ returns a word where only the highest bit is
-- set.  It is found by first setting all bits in lower positions than
-- the highest bit and than taking an exclusive or with the original
-- value.  Allthough the function may look expensive, GHC compiles
-- this into excellent C code that subsequently compiled into highly
-- efficient machine code. The algorithm is derived from Jorg Arndt's
-- FXT library.
highestBitMask :: Nat -> Nat
highestBitMask x0
  = case (x0 .|. shiftRL x0 1) of
     x1 -> case (x1 .|. shiftRL x1 2) of
      x2 -> case (x2 .|. shiftRL x2 4) of
       x3 -> case (x3 .|. shiftRL x3 8) of
        x4 -> case (x4 .|. shiftRL x4 16) of
         x5 -> case (x5 .|. shiftRL x5 32) of   -- for 64 bit platforms
          x6 -> (x6 `xor` (shiftRL x6 1))