summaryrefslogtreecommitdiff
path: root/compiler/utils/Util.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/utils/Util.hs')
-rw-r--r--compiler/utils/Util.hs1097
1 files changed, 1097 insertions, 0 deletions
diff --git a/compiler/utils/Util.hs b/compiler/utils/Util.hs
new file mode 100644
index 0000000000..7d44a5004b
--- /dev/null
+++ b/compiler/utils/Util.hs
@@ -0,0 +1,1097 @@
+-- (c) The University of Glasgow 2006
+
+{-# LANGUAGE CPP #-}
+
+-- | Highly random utility functions
+--
+module Util (
+ -- * Flags dependent on the compiler build
+ ghciSupported, debugIsOn, ncgDebugIsOn,
+ ghciTablesNextToCode,
+ isWindowsHost, isDarwinHost,
+
+ -- * General list processing
+ zipEqual, zipWithEqual, zipWith3Equal, zipWith4Equal,
+ zipLazy, stretchZipWith, zipWithAndUnzip,
+
+ unzipWith,
+
+ mapFst, mapSnd, chkAppend,
+ mapAndUnzip, mapAndUnzip3, mapAccumL2,
+ nOfThem, filterOut, partitionWith, splitEithers,
+
+ dropWhileEndLE,
+
+ foldl1', foldl2, count, all2,
+
+ lengthExceeds, lengthIs, lengthAtLeast,
+ listLengthCmp, atLength,
+ equalLength, compareLength, leLength,
+
+ isSingleton, only, singleton,
+ notNull, snocView,
+
+ isIn, isn'tIn,
+
+ -- * Tuples
+ fstOf3, sndOf3, thirdOf3,
+ firstM, first3M,
+ third3,
+ uncurry3,
+
+ -- * List operations controlled by another list
+ takeList, dropList, splitAtList, split,
+ dropTail,
+
+ -- * For loop
+ nTimes,
+
+ -- * Sorting
+ sortWith, minWith, nubSort,
+
+ -- * Comparisons
+ isEqual, eqListBy, eqMaybeBy,
+ thenCmp, cmpList,
+ removeSpaces,
+
+ -- * Edit distance
+ fuzzyMatch, fuzzyLookup,
+
+ -- * Transitive closures
+ transitiveClosure,
+
+ -- * Strictness
+ seqList,
+
+ -- * Module names
+ looksLikeModuleName,
+
+ -- * Argument processing
+ getCmd, toCmdArgs, toArgs,
+
+ -- * Floating point
+ readRational,
+
+ -- * read helpers
+ maybeRead, maybeReadFuzzy,
+
+ -- * IO-ish utilities
+ doesDirNameExist,
+ getModificationUTCTime,
+ modificationTimeIfExists,
+
+ global, consIORef, globalM,
+
+ -- * Filenames and paths
+ Suffix,
+ splitLongestPrefix,
+ escapeSpaces,
+ Direction(..), reslash,
+ makeRelativeTo,
+
+ -- * Utils for defining Data instances
+ abstractConstr, abstractDataType, mkNoRepType,
+
+ -- * Utils for printing C code
+ charToC,
+
+ -- * Hashing
+ hashString,
+ ) where
+
+#include "HsVersions.h"
+
+import Exception
+import Panic
+
+import Data.Data
+import Data.IORef ( IORef, newIORef, atomicModifyIORef )
+import System.IO.Unsafe ( unsafePerformIO )
+import Data.List hiding (group)
+
+#ifdef DEBUG
+import FastTypes
+#endif
+
+import Control.Monad ( liftM )
+import System.IO.Error as IO ( isDoesNotExistError )
+import System.Directory ( doesDirectoryExist, getModificationTime )
+import System.FilePath
+
+import Data.Char ( isUpper, isAlphaNum, isSpace, chr, ord, isDigit )
+import Data.Int
+import Data.Ratio ( (%) )
+import Data.Ord ( comparing )
+import Data.Bits
+import Data.Word
+import qualified Data.IntMap as IM
+import qualified Data.Set as Set
+
+import Data.Time
+
+infixr 9 `thenCmp`
+
+{-
+************************************************************************
+* *
+\subsection{Is DEBUG on, are we on Windows, etc?}
+* *
+************************************************************************
+
+These booleans are global constants, set by CPP flags. They allow us to
+recompile a single module (this one) to change whether or not debug output
+appears. They sometimes let us avoid even running CPP elsewhere.
+
+It's important that the flags are literal constants (True/False). Then,
+with -0, tests of the flags in other modules will simplify to the correct
+branch of the conditional, thereby dropping debug code altogether when
+the flags are off.
+-}
+
+ghciSupported :: Bool
+#ifdef GHCI
+ghciSupported = True
+#else
+ghciSupported = False
+#endif
+
+debugIsOn :: Bool
+#ifdef DEBUG
+debugIsOn = True
+#else
+debugIsOn = False
+#endif
+
+ncgDebugIsOn :: Bool
+#ifdef NCG_DEBUG
+ncgDebugIsOn = True
+#else
+ncgDebugIsOn = False
+#endif
+
+ghciTablesNextToCode :: Bool
+#ifdef GHCI_TABLES_NEXT_TO_CODE
+ghciTablesNextToCode = True
+#else
+ghciTablesNextToCode = False
+#endif
+
+isWindowsHost :: Bool
+#ifdef mingw32_HOST_OS
+isWindowsHost = True
+#else
+isWindowsHost = False
+#endif
+
+isDarwinHost :: Bool
+#ifdef darwin_HOST_OS
+isDarwinHost = True
+#else
+isDarwinHost = False
+#endif
+
+{-
+************************************************************************
+* *
+\subsection{A for loop}
+* *
+************************************************************************
+-}
+
+-- | Compose a function with itself n times. (nth rather than twice)
+nTimes :: Int -> (a -> a) -> (a -> a)
+nTimes 0 _ = id
+nTimes 1 f = f
+nTimes n f = f . nTimes (n-1) f
+
+fstOf3 :: (a,b,c) -> a
+sndOf3 :: (a,b,c) -> b
+thirdOf3 :: (a,b,c) -> c
+fstOf3 (a,_,_) = a
+sndOf3 (_,b,_) = b
+thirdOf3 (_,_,c) = c
+
+third3 :: (c -> d) -> (a, b, c) -> (a, b, d)
+third3 f (a, b, c) = (a, b, f c)
+
+uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
+uncurry3 f (a, b, c) = f a b c
+
+firstM :: Monad m => (a -> m c) -> (a, b) -> m (c, b)
+firstM f (x, y) = liftM (\x' -> (x', y)) (f x)
+
+first3M :: Monad m => (a -> m d) -> (a, b, c) -> m (d, b, c)
+first3M f (x, y, z) = liftM (\x' -> (x', y, z)) (f x)
+
+{-
+************************************************************************
+* *
+\subsection[Utils-lists]{General list processing}
+* *
+************************************************************************
+-}
+
+filterOut :: (a->Bool) -> [a] -> [a]
+-- ^ Like filter, only it reverses the sense of the test
+filterOut _ [] = []
+filterOut p (x:xs) | p x = filterOut p xs
+ | otherwise = x : filterOut p xs
+
+partitionWith :: (a -> Either b c) -> [a] -> ([b], [c])
+-- ^ Uses a function to determine which of two output lists an input element should join
+partitionWith _ [] = ([],[])
+partitionWith f (x:xs) = case f x of
+ Left b -> (b:bs, cs)
+ Right c -> (bs, c:cs)
+ where (bs,cs) = partitionWith f xs
+
+splitEithers :: [Either a b] -> ([a], [b])
+-- ^ Teases a list of 'Either's apart into two lists
+splitEithers [] = ([],[])
+splitEithers (e : es) = case e of
+ Left x -> (x:xs, ys)
+ Right y -> (xs, y:ys)
+ where (xs,ys) = splitEithers es
+
+chkAppend :: [a] -> [a] -> [a]
+-- Checks for the second arguemnt being empty
+-- Used in situations where that situation is common
+chkAppend xs ys
+ | null ys = xs
+ | otherwise = xs ++ ys
+
+{-
+A paranoid @zip@ (and some @zipWith@ friends) that checks the lists
+are of equal length. Alastair Reid thinks this should only happen if
+DEBUGging on; hey, why not?
+-}
+
+zipEqual :: String -> [a] -> [b] -> [(a,b)]
+zipWithEqual :: String -> (a->b->c) -> [a]->[b]->[c]
+zipWith3Equal :: String -> (a->b->c->d) -> [a]->[b]->[c]->[d]
+zipWith4Equal :: String -> (a->b->c->d->e) -> [a]->[b]->[c]->[d]->[e]
+
+#ifndef DEBUG
+zipEqual _ = zip
+zipWithEqual _ = zipWith
+zipWith3Equal _ = zipWith3
+zipWith4Equal _ = zipWith4
+#else
+zipEqual _ [] [] = []
+zipEqual msg (a:as) (b:bs) = (a,b) : zipEqual msg as bs
+zipEqual msg _ _ = panic ("zipEqual: unequal lists:"++msg)
+
+zipWithEqual msg z (a:as) (b:bs)= z a b : zipWithEqual msg z as bs
+zipWithEqual _ _ [] [] = []
+zipWithEqual msg _ _ _ = panic ("zipWithEqual: unequal lists:"++msg)
+
+zipWith3Equal msg z (a:as) (b:bs) (c:cs)
+ = z a b c : zipWith3Equal msg z as bs cs
+zipWith3Equal _ _ [] [] [] = []
+zipWith3Equal msg _ _ _ _ = panic ("zipWith3Equal: unequal lists:"++msg)
+
+zipWith4Equal msg z (a:as) (b:bs) (c:cs) (d:ds)
+ = z a b c d : zipWith4Equal msg z as bs cs ds
+zipWith4Equal _ _ [] [] [] [] = []
+zipWith4Equal msg _ _ _ _ _ = panic ("zipWith4Equal: unequal lists:"++msg)
+#endif
+
+-- | 'zipLazy' is a kind of 'zip' that is lazy in the second list (observe the ~)
+zipLazy :: [a] -> [b] -> [(a,b)]
+zipLazy [] _ = []
+zipLazy (x:xs) ~(y:ys) = (x,y) : zipLazy xs ys
+
+stretchZipWith :: (a -> Bool) -> b -> (a->b->c) -> [a] -> [b] -> [c]
+-- ^ @stretchZipWith p z f xs ys@ stretches @ys@ by inserting @z@ in
+-- the places where @p@ returns @True@
+
+stretchZipWith _ _ _ [] _ = []
+stretchZipWith p z f (x:xs) ys
+ | p x = f x z : stretchZipWith p z f xs ys
+ | otherwise = case ys of
+ [] -> []
+ (y:ys) -> f x y : stretchZipWith p z f xs ys
+
+mapFst :: (a->c) -> [(a,b)] -> [(c,b)]
+mapSnd :: (b->c) -> [(a,b)] -> [(a,c)]
+
+mapFst f xys = [(f x, y) | (x,y) <- xys]
+mapSnd f xys = [(x, f y) | (x,y) <- xys]
+
+mapAndUnzip :: (a -> (b, c)) -> [a] -> ([b], [c])
+
+mapAndUnzip _ [] = ([], [])
+mapAndUnzip f (x:xs)
+ = let (r1, r2) = f x
+ (rs1, rs2) = mapAndUnzip f xs
+ in
+ (r1:rs1, r2:rs2)
+
+mapAndUnzip3 :: (a -> (b, c, d)) -> [a] -> ([b], [c], [d])
+
+mapAndUnzip3 _ [] = ([], [], [])
+mapAndUnzip3 f (x:xs)
+ = let (r1, r2, r3) = f x
+ (rs1, rs2, rs3) = mapAndUnzip3 f xs
+ in
+ (r1:rs1, r2:rs2, r3:rs3)
+
+zipWithAndUnzip :: (a -> b -> (c,d)) -> [a] -> [b] -> ([c],[d])
+zipWithAndUnzip f (a:as) (b:bs)
+ = let (r1, r2) = f a b
+ (rs1, rs2) = zipWithAndUnzip f as bs
+ in
+ (r1:rs1, r2:rs2)
+zipWithAndUnzip _ _ _ = ([],[])
+
+mapAccumL2 :: (s1 -> s2 -> a -> (s1, s2, b)) -> s1 -> s2 -> [a] -> (s1, s2, [b])
+mapAccumL2 f s1 s2 xs = (s1', s2', ys)
+ where ((s1', s2'), ys) = mapAccumL (\(s1, s2) x -> case f s1 s2 x of
+ (s1', s2', y) -> ((s1', s2'), y))
+ (s1, s2) xs
+
+nOfThem :: Int -> a -> [a]
+nOfThem n thing = replicate n thing
+
+-- | @atLength atLen atEnd ls n@ unravels list @ls@ to position @n@. Precisely:
+--
+-- @
+-- atLength atLenPred atEndPred ls n
+-- | n < 0 = atLenPred n
+-- | length ls < n = atEndPred (n - length ls)
+-- | otherwise = atLenPred (drop n ls)
+-- @
+atLength :: ([a] -> b)
+ -> (Int -> b)
+ -> [a]
+ -> Int
+ -> b
+atLength atLenPred atEndPred ls n
+ | n < 0 = atEndPred n
+ | otherwise = go n ls
+ where
+ go n [] = atEndPred n
+ go 0 ls = atLenPred ls
+ go n (_:xs) = go (n-1) xs
+
+-- Some special cases of atLength:
+
+lengthExceeds :: [a] -> Int -> Bool
+-- ^ > (lengthExceeds xs n) = (length xs > n)
+lengthExceeds = atLength notNull (const False)
+
+lengthAtLeast :: [a] -> Int -> Bool
+lengthAtLeast = atLength notNull (== 0)
+
+lengthIs :: [a] -> Int -> Bool
+lengthIs = atLength null (==0)
+
+listLengthCmp :: [a] -> Int -> Ordering
+listLengthCmp = atLength atLen atEnd
+ where
+ atEnd 0 = EQ
+ atEnd x
+ | x > 0 = LT -- not yet seen 'n' elts, so list length is < n.
+ | otherwise = GT
+
+ atLen [] = EQ
+ atLen _ = GT
+
+equalLength :: [a] -> [b] -> Bool
+equalLength [] [] = True
+equalLength (_:xs) (_:ys) = equalLength xs ys
+equalLength _ _ = False
+
+compareLength :: [a] -> [b] -> Ordering
+compareLength [] [] = EQ
+compareLength (_:xs) (_:ys) = compareLength xs ys
+compareLength [] _ = LT
+compareLength _ [] = GT
+
+leLength :: [a] -> [b] -> Bool
+-- ^ True if length xs <= length ys
+leLength xs ys = case compareLength xs ys of
+ LT -> True
+ EQ -> True
+ GT -> False
+
+----------------------------
+singleton :: a -> [a]
+singleton x = [x]
+
+isSingleton :: [a] -> Bool
+isSingleton [_] = True
+isSingleton _ = False
+
+notNull :: [a] -> Bool
+notNull [] = False
+notNull _ = True
+
+only :: [a] -> a
+#ifdef DEBUG
+only [a] = a
+#else
+only (a:_) = a
+#endif
+only _ = panic "Util: only"
+
+-- Debugging/specialising versions of \tr{elem} and \tr{notElem}
+
+isIn, isn'tIn :: Eq a => String -> a -> [a] -> Bool
+
+# ifndef DEBUG
+isIn _msg x ys = x `elem` ys
+isn'tIn _msg x ys = x `notElem` ys
+
+# else /* DEBUG */
+isIn msg x ys
+ = elem100 (_ILIT(0)) x ys
+ where
+ elem100 _ _ [] = False
+ elem100 i x (y:ys)
+ | i ># _ILIT(100) = trace ("Over-long elem in " ++ msg)
+ (x `elem` (y:ys))
+ | otherwise = x == y || elem100 (i +# _ILIT(1)) x ys
+
+isn'tIn msg x ys
+ = notElem100 (_ILIT(0)) x ys
+ where
+ notElem100 _ _ [] = True
+ notElem100 i x (y:ys)
+ | i ># _ILIT(100) = trace ("Over-long notElem in " ++ msg)
+ (x `notElem` (y:ys))
+ | otherwise = x /= y && notElem100 (i +# _ILIT(1)) x ys
+# endif /* DEBUG */
+
+{-
+************************************************************************
+* *
+\subsubsection{Sort utils}
+* *
+************************************************************************
+-}
+
+sortWith :: Ord b => (a->b) -> [a] -> [a]
+sortWith get_key xs = sortBy (comparing get_key) xs
+
+minWith :: Ord b => (a -> b) -> [a] -> a
+minWith get_key xs = ASSERT( not (null xs) )
+ head (sortWith get_key xs)
+
+nubSort :: Ord a => [a] -> [a]
+nubSort = Set.toAscList . Set.fromList
+
+{-
+************************************************************************
+* *
+\subsection[Utils-transitive-closure]{Transitive closure}
+* *
+************************************************************************
+
+This algorithm for transitive closure is straightforward, albeit quadratic.
+-}
+
+transitiveClosure :: (a -> [a]) -- Successor function
+ -> (a -> a -> Bool) -- Equality predicate
+ -> [a]
+ -> [a] -- The transitive closure
+
+transitiveClosure succ eq xs
+ = go [] xs
+ where
+ go done [] = done
+ go done (x:xs) | x `is_in` done = go done xs
+ | otherwise = go (x:done) (succ x ++ xs)
+
+ _ `is_in` [] = False
+ x `is_in` (y:ys) | eq x y = True
+ | otherwise = x `is_in` ys
+
+{-
+************************************************************************
+* *
+\subsection[Utils-accum]{Accumulating}
+* *
+************************************************************************
+
+A combination of foldl with zip. It works with equal length lists.
+-}
+
+foldl2 :: (acc -> a -> b -> acc) -> acc -> [a] -> [b] -> acc
+foldl2 _ z [] [] = z
+foldl2 k z (a:as) (b:bs) = foldl2 k (k z a b) as bs
+foldl2 _ _ _ _ = panic "Util: foldl2"
+
+all2 :: (a -> b -> Bool) -> [a] -> [b] -> Bool
+-- True if the lists are the same length, and
+-- all corresponding elements satisfy the predicate
+all2 _ [] [] = True
+all2 p (x:xs) (y:ys) = p x y && all2 p xs ys
+all2 _ _ _ = False
+
+-- Count the number of times a predicate is true
+
+count :: (a -> Bool) -> [a] -> Int
+count _ [] = 0
+count p (x:xs) | p x = 1 + count p xs
+ | otherwise = count p xs
+
+{-
+@splitAt@, @take@, and @drop@ but with length of another
+list giving the break-off point:
+-}
+
+takeList :: [b] -> [a] -> [a]
+takeList [] _ = []
+takeList (_:xs) ls =
+ case ls of
+ [] -> []
+ (y:ys) -> y : takeList xs ys
+
+dropList :: [b] -> [a] -> [a]
+dropList [] xs = xs
+dropList _ xs@[] = xs
+dropList (_:xs) (_:ys) = dropList xs ys
+
+
+splitAtList :: [b] -> [a] -> ([a], [a])
+splitAtList [] xs = ([], xs)
+splitAtList _ xs@[] = (xs, xs)
+splitAtList (_:xs) (y:ys) = (y:ys', ys'')
+ where
+ (ys', ys'') = splitAtList xs ys
+
+-- drop from the end of a list
+dropTail :: Int -> [a] -> [a]
+-- Specification: dropTail n = reverse . drop n . reverse
+-- Better implemention due to Joachim Breitner
+-- http://www.joachim-breitner.de/blog/archives/600-On-taking-the-last-n-elements-of-a-list.html
+dropTail n xs
+ = go (drop n xs) xs
+ where
+ go (_:ys) (x:xs) = x : go ys xs
+ go _ _ = [] -- Stop when ys runs out
+ -- It'll always run out before xs does
+
+-- dropWhile from the end of a list. This is similar to Data.List.dropWhileEnd,
+-- but is lazy in the elements and strict in the spine. For reasonably short lists,
+-- such as path names and typical lines of text, dropWhileEndLE is generally
+-- faster than dropWhileEnd. Its advantage is magnified when the predicate is
+-- expensive--using dropWhileEndLE isSpace to strip the space off a line of text
+-- is generally much faster than using dropWhileEnd isSpace for that purpose.
+-- Specification: dropWhileEndLE p = reverse . dropWhile p . reverse
+-- Pay attention to the short-circuit (&&)! The order of its arguments is the only
+-- difference between dropWhileEnd and dropWhileEndLE.
+dropWhileEndLE :: (a -> Bool) -> [a] -> [a]
+dropWhileEndLE p = foldr (\x r -> if null r && p x then [] else x:r) []
+
+snocView :: [a] -> Maybe ([a],a)
+ -- Split off the last element
+snocView [] = Nothing
+snocView xs = go [] xs
+ where
+ -- Invariant: second arg is non-empty
+ go acc [x] = Just (reverse acc, x)
+ go acc (x:xs) = go (x:acc) xs
+ go _ [] = panic "Util: snocView"
+
+split :: Char -> String -> [String]
+split c s = case rest of
+ [] -> [chunk]
+ _:rest -> chunk : split c rest
+ where (chunk, rest) = break (==c) s
+
+{-
+************************************************************************
+* *
+\subsection[Utils-comparison]{Comparisons}
+* *
+************************************************************************
+-}
+
+isEqual :: Ordering -> Bool
+-- Often used in (isEqual (a `compare` b))
+isEqual GT = False
+isEqual EQ = True
+isEqual LT = False
+
+thenCmp :: Ordering -> Ordering -> Ordering
+{-# INLINE thenCmp #-}
+thenCmp EQ ordering = ordering
+thenCmp ordering _ = ordering
+
+eqListBy :: (a->a->Bool) -> [a] -> [a] -> Bool
+eqListBy _ [] [] = True
+eqListBy eq (x:xs) (y:ys) = eq x y && eqListBy eq xs ys
+eqListBy _ _ _ = False
+
+eqMaybeBy :: (a ->a->Bool) -> Maybe a -> Maybe a -> Bool
+eqMaybeBy _ Nothing Nothing = True
+eqMaybeBy eq (Just x) (Just y) = eq x y
+eqMaybeBy _ _ _ = False
+
+cmpList :: (a -> a -> Ordering) -> [a] -> [a] -> Ordering
+ -- `cmpList' uses a user-specified comparer
+
+cmpList _ [] [] = EQ
+cmpList _ [] _ = LT
+cmpList _ _ [] = GT
+cmpList cmp (a:as) (b:bs)
+ = case cmp a b of { EQ -> cmpList cmp as bs; xxx -> xxx }
+
+removeSpaces :: String -> String
+removeSpaces = dropWhileEndLE isSpace . dropWhile isSpace
+
+{-
+************************************************************************
+* *
+\subsection{Edit distance}
+* *
+************************************************************************
+-}
+
+-- | Find the "restricted" Damerau-Levenshtein edit distance between two strings.
+-- See: <http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance>.
+-- Based on the algorithm presented in "A Bit-Vector Algorithm for Computing
+-- Levenshtein and Damerau Edit Distances" in PSC'02 (Heikki Hyyro).
+-- See http://www.cs.uta.fi/~helmu/pubs/psc02.pdf and
+-- http://www.cs.uta.fi/~helmu/pubs/PSCerr.html for an explanation
+restrictedDamerauLevenshteinDistance :: String -> String -> Int
+restrictedDamerauLevenshteinDistance str1 str2
+ = restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
+ where
+ m = length str1
+ n = length str2
+
+restrictedDamerauLevenshteinDistanceWithLengths
+ :: Int -> Int -> String -> String -> Int
+restrictedDamerauLevenshteinDistanceWithLengths m n str1 str2
+ | m <= n
+ = if n <= 32 -- n must be larger so this check is sufficient
+ then restrictedDamerauLevenshteinDistance' (undefined :: Word32) m n str1 str2
+ else restrictedDamerauLevenshteinDistance' (undefined :: Integer) m n str1 str2
+
+ | otherwise
+ = if m <= 32 -- m must be larger so this check is sufficient
+ then restrictedDamerauLevenshteinDistance' (undefined :: Word32) n m str2 str1
+ else restrictedDamerauLevenshteinDistance' (undefined :: Integer) n m str2 str1
+
+restrictedDamerauLevenshteinDistance'
+ :: (Bits bv, Num bv) => bv -> Int -> Int -> String -> String -> Int
+restrictedDamerauLevenshteinDistance' _bv_dummy m n str1 str2
+ | [] <- str1 = n
+ | otherwise = extractAnswer $
+ foldl' (restrictedDamerauLevenshteinDistanceWorker
+ (matchVectors str1) top_bit_mask vector_mask)
+ (0, 0, m_ones, 0, m) str2
+ where
+ m_ones@vector_mask = (2 ^ m) - 1
+ top_bit_mask = (1 `shiftL` (m - 1)) `asTypeOf` _bv_dummy
+ extractAnswer (_, _, _, _, distance) = distance
+
+restrictedDamerauLevenshteinDistanceWorker
+ :: (Bits bv, Num bv) => IM.IntMap bv -> bv -> bv
+ -> (bv, bv, bv, bv, Int) -> Char -> (bv, bv, bv, bv, Int)
+restrictedDamerauLevenshteinDistanceWorker str1_mvs top_bit_mask vector_mask
+ (pm, d0, vp, vn, distance) char2
+ = seq str1_mvs $ seq top_bit_mask $ seq vector_mask $
+ seq pm' $ seq d0' $ seq vp' $ seq vn' $
+ seq distance'' $ seq char2 $
+ (pm', d0', vp', vn', distance'')
+ where
+ pm' = IM.findWithDefault 0 (ord char2) str1_mvs
+
+ d0' = ((((sizedComplement vector_mask d0) .&. pm') `shiftL` 1) .&. pm)
+ .|. ((((pm' .&. vp) + vp) .&. vector_mask) `xor` vp) .|. pm' .|. vn
+ -- No need to mask the shiftL because of the restricted range of pm
+
+ hp' = vn .|. sizedComplement vector_mask (d0' .|. vp)
+ hn' = d0' .&. vp
+
+ hp'_shift = ((hp' `shiftL` 1) .|. 1) .&. vector_mask
+ hn'_shift = (hn' `shiftL` 1) .&. vector_mask
+ vp' = hn'_shift .|. sizedComplement vector_mask (d0' .|. hp'_shift)
+ vn' = d0' .&. hp'_shift
+
+ distance' = if hp' .&. top_bit_mask /= 0 then distance + 1 else distance
+ distance'' = if hn' .&. top_bit_mask /= 0 then distance' - 1 else distance'
+
+sizedComplement :: Bits bv => bv -> bv -> bv
+sizedComplement vector_mask vect = vector_mask `xor` vect
+
+matchVectors :: (Bits bv, Num bv) => String -> IM.IntMap bv
+matchVectors = snd . foldl' go (0 :: Int, IM.empty)
+ where
+ go (ix, im) char = let ix' = ix + 1
+ im' = IM.insertWith (.|.) (ord char) (2 ^ ix) im
+ in seq ix' $ seq im' $ (ix', im')
+
+{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
+ :: Word32 -> Int -> Int -> String -> String -> Int #-}
+{-# SPECIALIZE INLINE restrictedDamerauLevenshteinDistance'
+ :: Integer -> Int -> Int -> String -> String -> Int #-}
+
+{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
+ :: IM.IntMap Word32 -> Word32 -> Word32
+ -> (Word32, Word32, Word32, Word32, Int)
+ -> Char -> (Word32, Word32, Word32, Word32, Int) #-}
+{-# SPECIALIZE restrictedDamerauLevenshteinDistanceWorker
+ :: IM.IntMap Integer -> Integer -> Integer
+ -> (Integer, Integer, Integer, Integer, Int)
+ -> Char -> (Integer, Integer, Integer, Integer, Int) #-}
+
+{-# SPECIALIZE INLINE sizedComplement :: Word32 -> Word32 -> Word32 #-}
+{-# SPECIALIZE INLINE sizedComplement :: Integer -> Integer -> Integer #-}
+
+{-# SPECIALIZE matchVectors :: String -> IM.IntMap Word32 #-}
+{-# SPECIALIZE matchVectors :: String -> IM.IntMap Integer #-}
+
+fuzzyMatch :: String -> [String] -> [String]
+fuzzyMatch key vals = fuzzyLookup key [(v,v) | v <- vals]
+
+-- | Search for possible matches to the users input in the given list,
+-- returning a small number of ranked results
+fuzzyLookup :: String -> [(String,a)] -> [a]
+fuzzyLookup user_entered possibilites
+ = map fst $ take mAX_RESULTS $ sortBy (comparing snd)
+ [ (poss_val, distance) | (poss_str, poss_val) <- possibilites
+ , let distance = restrictedDamerauLevenshteinDistance
+ poss_str user_entered
+ , distance <= fuzzy_threshold ]
+ where
+ -- Work out an approriate match threshold:
+ -- We report a candidate if its edit distance is <= the threshold,
+ -- The threshhold is set to about a quarter of the # of characters the user entered
+ -- Length Threshold
+ -- 1 0 -- Don't suggest *any* candidates
+ -- 2 1 -- for single-char identifiers
+ -- 3 1
+ -- 4 1
+ -- 5 1
+ -- 6 2
+ --
+ fuzzy_threshold = truncate $ fromIntegral (length user_entered + 2) / (4 :: Rational)
+ mAX_RESULTS = 3
+
+{-
+************************************************************************
+* *
+\subsection[Utils-pairs]{Pairs}
+* *
+************************************************************************
+-}
+
+unzipWith :: (a -> b -> c) -> [(a, b)] -> [c]
+unzipWith f pairs = map ( \ (a, b) -> f a b ) pairs
+
+seqList :: [a] -> b -> b
+seqList [] b = b
+seqList (x:xs) b = x `seq` seqList xs b
+
+-- Global variables:
+
+global :: a -> IORef a
+global a = unsafePerformIO (newIORef a)
+
+consIORef :: IORef [a] -> a -> IO ()
+consIORef var x = do
+ atomicModifyIORef var (\xs -> (x:xs,()))
+
+globalM :: IO a -> IORef a
+globalM ma = unsafePerformIO (ma >>= newIORef)
+
+-- Module names:
+
+looksLikeModuleName :: String -> Bool
+looksLikeModuleName [] = False
+looksLikeModuleName (c:cs) = isUpper c && go cs
+ where go [] = True
+ go ('.':cs) = looksLikeModuleName cs
+ go (c:cs) = (isAlphaNum c || c == '_' || c == '\'') && go cs
+
+{-
+Akin to @Prelude.words@, but acts like the Bourne shell, treating
+quoted strings as Haskell Strings, and also parses Haskell [String]
+syntax.
+-}
+
+getCmd :: String -> Either String -- Error
+ (String, String) -- (Cmd, Rest)
+getCmd s = case break isSpace $ dropWhile isSpace s of
+ ([], _) -> Left ("Couldn't find command in " ++ show s)
+ res -> Right res
+
+toCmdArgs :: String -> Either String -- Error
+ (String, [String]) -- (Cmd, Args)
+toCmdArgs s = case getCmd s of
+ Left err -> Left err
+ Right (cmd, s') -> case toArgs s' of
+ Left err -> Left err
+ Right args -> Right (cmd, args)
+
+toArgs :: String -> Either String -- Error
+ [String] -- Args
+toArgs str
+ = case dropWhile isSpace str of
+ s@('[':_) -> case reads s of
+ [(args, spaces)]
+ | all isSpace spaces ->
+ Right args
+ _ ->
+ Left ("Couldn't read " ++ show str ++ "as [String]")
+ s -> toArgs' s
+ where
+ toArgs' s = case dropWhile isSpace s of
+ [] -> Right []
+ ('"' : _) -> case reads s of
+ [(arg, rest)]
+ -- rest must either be [] or start with a space
+ | all isSpace (take 1 rest) ->
+ case toArgs' rest of
+ Left err -> Left err
+ Right args -> Right (arg : args)
+ _ ->
+ Left ("Couldn't read " ++ show s ++ "as String")
+ s' -> case break isSpace s' of
+ (arg, s'') -> case toArgs' s'' of
+ Left err -> Left err
+ Right args -> Right (arg : args)
+
+{-
+-- -----------------------------------------------------------------------------
+-- Floats
+-}
+
+readRational__ :: ReadS Rational -- NB: doesn't handle leading "-"
+readRational__ r = do
+ (n,d,s) <- readFix r
+ (k,t) <- readExp s
+ return ((n%1)*10^^(k-d), t)
+ where
+ readFix r = do
+ (ds,s) <- lexDecDigits r
+ (ds',t) <- lexDotDigits s
+ return (read (ds++ds'), length ds', t)
+
+ readExp (e:s) | e `elem` "eE" = readExp' s
+ readExp s = return (0,s)
+
+ readExp' ('+':s) = readDec s
+ readExp' ('-':s) = do (k,t) <- readDec s
+ return (-k,t)
+ readExp' s = readDec s
+
+ readDec s = do
+ (ds,r) <- nonnull isDigit s
+ return (foldl1 (\n d -> n * 10 + d) [ ord d - ord '0' | d <- ds ],
+ r)
+
+ lexDecDigits = nonnull isDigit
+
+ lexDotDigits ('.':s) = return (span isDigit s)
+ lexDotDigits s = return ("",s)
+
+ nonnull p s = do (cs@(_:_),t) <- return (span p s)
+ return (cs,t)
+
+readRational :: String -> Rational -- NB: *does* handle a leading "-"
+readRational top_s
+ = case top_s of
+ '-' : xs -> - (read_me xs)
+ xs -> read_me xs
+ where
+ read_me s
+ = case (do { (x,"") <- readRational__ s ; return x }) of
+ [x] -> x
+ [] -> error ("readRational: no parse:" ++ top_s)
+ _ -> error ("readRational: ambiguous parse:" ++ top_s)
+
+
+-----------------------------------------------------------------------------
+-- read helpers
+
+maybeRead :: Read a => String -> Maybe a
+maybeRead str = case reads str of
+ [(x, "")] -> Just x
+ _ -> Nothing
+
+maybeReadFuzzy :: Read a => String -> Maybe a
+maybeReadFuzzy str = case reads str of
+ [(x, s)]
+ | all isSpace s ->
+ Just x
+ _ ->
+ Nothing
+
+-----------------------------------------------------------------------------
+-- Verify that the 'dirname' portion of a FilePath exists.
+--
+doesDirNameExist :: FilePath -> IO Bool
+doesDirNameExist fpath = doesDirectoryExist (takeDirectory fpath)
+
+-----------------------------------------------------------------------------
+-- Backwards compatibility definition of getModificationTime
+
+getModificationUTCTime :: FilePath -> IO UTCTime
+getModificationUTCTime = getModificationTime
+
+-- --------------------------------------------------------------
+-- check existence & modification time at the same time
+
+modificationTimeIfExists :: FilePath -> IO (Maybe UTCTime)
+modificationTimeIfExists f = do
+ (do t <- getModificationUTCTime f; return (Just t))
+ `catchIO` \e -> if isDoesNotExistError e
+ then return Nothing
+ else ioError e
+
+-- split a string at the last character where 'pred' is True,
+-- returning a pair of strings. The first component holds the string
+-- up (but not including) the last character for which 'pred' returned
+-- True, the second whatever comes after (but also not including the
+-- last character).
+--
+-- If 'pred' returns False for all characters in the string, the original
+-- string is returned in the first component (and the second one is just
+-- empty).
+splitLongestPrefix :: String -> (Char -> Bool) -> (String,String)
+splitLongestPrefix str pred
+ | null r_pre = (str, [])
+ | otherwise = (reverse (tail r_pre), reverse r_suf)
+ -- 'tail' drops the char satisfying 'pred'
+ where (r_suf, r_pre) = break pred (reverse str)
+
+escapeSpaces :: String -> String
+escapeSpaces = foldr (\c s -> if isSpace c then '\\':c:s else c:s) ""
+
+type Suffix = String
+
+--------------------------------------------------------------
+-- * Search path
+--------------------------------------------------------------
+
+data Direction = Forwards | Backwards
+
+reslash :: Direction -> FilePath -> FilePath
+reslash d = f
+ where f ('/' : xs) = slash : f xs
+ f ('\\' : xs) = slash : f xs
+ f (x : xs) = x : f xs
+ f "" = ""
+ slash = case d of
+ Forwards -> '/'
+ Backwards -> '\\'
+
+makeRelativeTo :: FilePath -> FilePath -> FilePath
+this `makeRelativeTo` that = directory </> thisFilename
+ where (thisDirectory, thisFilename) = splitFileName this
+ thatDirectory = dropFileName that
+ directory = joinPath $ f (splitPath thisDirectory)
+ (splitPath thatDirectory)
+
+ f (x : xs) (y : ys)
+ | x == y = f xs ys
+ f xs ys = replicate (length ys) ".." ++ xs
+
+{-
+************************************************************************
+* *
+\subsection[Utils-Data]{Utils for defining Data instances}
+* *
+************************************************************************
+
+These functions helps us to define Data instances for abstract types.
+-}
+
+abstractConstr :: String -> Constr
+abstractConstr n = mkConstr (abstractDataType n) ("{abstract:"++n++"}") [] Prefix
+
+abstractDataType :: String -> DataType
+abstractDataType n = mkDataType n [abstractConstr n]
+
+{-
+************************************************************************
+* *
+\subsection[Utils-C]{Utils for printing C code}
+* *
+************************************************************************
+-}
+
+charToC :: Word8 -> String
+charToC w =
+ case chr (fromIntegral w) of
+ '\"' -> "\\\""
+ '\'' -> "\\\'"
+ '\\' -> "\\\\"
+ c | c >= ' ' && c <= '~' -> [c]
+ | otherwise -> ['\\',
+ chr (ord '0' + ord c `div` 64),
+ chr (ord '0' + ord c `div` 8 `mod` 8),
+ chr (ord '0' + ord c `mod` 8)]
+
+{-
+************************************************************************
+* *
+\subsection[Utils-Hashing]{Utils for hashing}
+* *
+************************************************************************
+-}
+
+-- | A sample hash function for Strings. We keep multiplying by the
+-- golden ratio and adding. The implementation is:
+--
+-- > hashString = foldl' f golden
+-- > where f m c = fromIntegral (ord c) * magic + hashInt32 m
+-- > magic = 0xdeadbeef
+--
+-- Where hashInt32 works just as hashInt shown above.
+--
+-- Knuth argues that repeated multiplication by the golden ratio
+-- will minimize gaps in the hash space, and thus it's a good choice
+-- for combining together multiple keys to form one.
+--
+-- Here we know that individual characters c are often small, and this
+-- produces frequent collisions if we use ord c alone. A
+-- particular problem are the shorter low ASCII and ISO-8859-1
+-- character strings. We pre-multiply by a magic twiddle factor to
+-- obtain a good distribution. In fact, given the following test:
+--
+-- > testp :: Int32 -> Int
+-- > testp k = (n - ) . length . group . sort . map hs . take n $ ls
+-- > where ls = [] : [c : l | l <- ls, c <- ['\0'..'\xff']]
+-- > hs = foldl' f golden
+-- > f m c = fromIntegral (ord c) * k + hashInt32 m
+-- > n = 100000
+--
+-- We discover that testp magic = 0.
+hashString :: String -> Int32
+hashString = foldl' f golden
+ where f m c = fromIntegral (ord c) * magic + hashInt32 m
+ magic = fromIntegral (0xdeadbeef :: Word32)
+
+golden :: Int32
+golden = 1013904242 -- = round ((sqrt 5 - 1) * 2^32) :: Int32
+-- was -1640531527 = round ((sqrt 5 - 1) * 2^31) :: Int32
+-- but that has bad mulHi properties (even adding 2^32 to get its inverse)
+-- Whereas the above works well and contains no hash duplications for
+-- [-32767..65536]
+
+-- | A sample (and useful) hash function for Int32,
+-- implemented by extracting the uppermost 32 bits of the 64-bit
+-- result of multiplying by a 33-bit constant. The constant is from
+-- Knuth, derived from the golden ratio:
+--
+-- > golden = round ((sqrt 5 - 1) * 2^32)
+--
+-- We get good key uniqueness on small inputs
+-- (a problem with previous versions):
+-- (length $ group $ sort $ map hashInt32 [-32767..65536]) == 65536 + 32768
+--
+hashInt32 :: Int32 -> Int32
+hashInt32 x = mulHi x golden + x
+
+-- hi 32 bits of a x-bit * 32 bit -> 64-bit multiply
+mulHi :: Int32 -> Int32 -> Int32
+mulHi a b = fromIntegral (r `shiftR` 32)
+ where r :: Int64
+ r = fromIntegral a * fromIntegral b