haskell-hvr · andreasabel · Jul 18, 2022 · Jul 18, 2022 · Jul 18, 2022 · Jul 18, 2022
diff --git a/lib/Text/Regex/TDFA/Common.hs b/lib/Text/Regex/TDFA/Common.hs
@@ -21,6 +21,7 @@ import Data.Sequence as S(Seq)
 --import Debug.Trace
 
 import Text.Regex.TDFA.IntArrTrieSet(TrieSet)
+import Utils
 
 {-# INLINE look #-}
 look :: Int -> IntMap a -> a
@@ -30,43 +31,6 @@ common_error :: String -> String -> a
 common_error moduleName message =
   error ("Explict error in module "++moduleName++" : "++message)
 
-on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2
-f `on` g = (\x y -> (g x) `f` (g y))
-
--- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'.
-norep :: (Eq a) => [a]->[a]
-norep [] = []
-norep x@[_] = x
-norep (a:bs@(c:cs)) | a==c = norep (a:cs)
-                    | otherwise = a:norep bs
-
--- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'.
-norepBy :: (a -> a -> Bool) -> [a] -> [a]
-norepBy _ [] = []
-norepBy _ x@[_] = x
-norepBy eqF (a:bs@(c:cs)) | a `eqF` c = norepBy eqF (a:cs)
-                          | otherwise = a:norepBy eqF bs
-
-mapFst :: (Functor f) => (t -> t2) -> f (t, t1) -> f (t2, t1)
-mapFst f = fmap (\ (a,b) -> (f a,b))
-
-mapSnd :: (Functor f) => (t1 -> t2) -> f (t, t1) -> f (t, t2)
-mapSnd f = fmap (\ (a,b) -> (a,f b))
-
-fst3 :: (a,b,c) -> a
-fst3 (x,_,_) = x
-
-snd3 :: (a,b,c) -> b
-snd3 (_,x,_) = x
-
-thd3 :: (a,b,c) -> c
-thd3 (_,_,x) = x
-
-flipOrder :: Ordering -> Ordering
-flipOrder GT = LT
-flipOrder LT = GT
-flipOrder EQ = EQ
-
 noWin :: WinTags -> Bool
 noWin = null
 

diff --git a/lib/Text/Regex/TDFA/CorePattern.hs b/lib/Text/Regex/TDFA/CorePattern.hs
@@ -49,6 +49,7 @@ import Data.Semigroup as Sem
 
 import Text.Regex.TDFA.Common {- all -}
 import Text.Regex.TDFA.Pattern(Pattern(..),starTrans)
+import Utils
 -- import Debug.Trace
 
 {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}

diff --git a/lib/Text/Regex/TDFA/ReadRegex.hs b/lib/Text/Regex/TDFA/ReadRegex.hs
@@ -41,12 +41,12 @@ parseRegex x = runParser (do pat <- p_regex
 type P = CharParser (GroupIndex, Int)
 
 p_regex :: P Pattern
-p_regex = liftM POr $ sepBy1 p_branch (char '|')
+p_regex = POr <$> sepBy1 p_branch (char '|')
 
 -- man re_format helps a lot, it says one-or-more pieces so this is
 -- many1 not many.  Use "()" to indicate an empty piece.
 p_branch :: P Pattern
-p_branch = liftM PConcat $ many1 p_piece
+p_branch = PConcat <$> many1 p_piece
 
 p_piece :: P Pattern
 p_piece = (p_anchor <|> p_atom) >>= p_post_atom -- correct specification
@@ -62,35 +62,36 @@ group_index = do
   return (Just index)
 
 p_group :: P Pattern
-p_group = lookAhead (char '(') >> do
-  index <- group_index
-  liftM (PGroup index) $ between (char '(') (char ')') p_regex
+p_group = do
+  _ <- lookAhead (char '(')
+  PGroup <$> group_index <*> between (char '(') (char ')') p_regex
 
 -- p_post_atom takes the previous atom as a parameter
 p_post_atom :: Pattern -> P Pattern
-p_post_atom atom = (char '?' >> return (PQuest atom))
-               <|> (char '+' >> return (PPlus atom))
-               <|> (char '*' >> return (PStar True atom))
+p_post_atom atom = (char '?' $> PQuest atom)
+               <|> (char '+' $> PPlus atom)
+               <|> (char '*' $> PStar True atom)
                <|> p_bound atom
                <|> return atom
 
 p_bound :: Pattern -> P Pattern
 p_bound atom = try $ between (char '{') (char '}') (p_bound_spec atom)
 
 p_bound_spec :: Pattern -> P Pattern
-p_bound_spec atom = do lowS <- many1 digit
-                       let lowI = read lowS
-                       highMI <- option (Just lowI) $ try $ do
-                                   _ <- char ','
-  -- parsec note: if 'many digits' fails below then the 'try' ensures
-  -- that the ',' will not match the closing '}' in p_bound, same goes
-  -- for any non '}' garbage after the 'many digits'.
-                                   highS <- many digit
-                                   if null highS then return Nothing -- no upper bound
-                                     else do let highI = read highS
-                                             guard (lowI <= highI)
-                                             return (Just (read highS))
-                       return (PBound lowI highMI atom)
+p_bound_spec atom = do
+  lowI <- read <$> many1 digit
+  highMI <- option (Just lowI) $ try $ do
+    _ <- char ','
+    -- parsec note: if 'many digits' fails below then the 'try' ensures
+    -- that the ',' will not match the closing '}' in p_bound, same goes
+    -- for any non '}' garbage after the 'many digits'.
+    highS <- many digit
+    if null highS then return Nothing -- no upper bound
+    else do
+      let highI = read highS
+      guard (lowI <= highI)
+      return $ Just highI
+  return $ PBound lowI highMI atom
 
 -- An anchor cannot be modified by a repetition specifier
 p_anchor :: P Pattern
@@ -102,18 +103,29 @@ p_anchor = (char '^' >> liftM PCarat char_index)
        <?> "empty () or anchor ^ or $"
 
 char_index :: P DoPa
-char_index = do (gi,ci) <- getState
-                let ci' = succ ci
-                setState (gi,ci')
-                return (DoPa ci')
+char_index = do
+  (gi, ci) <- getState
+  let ci' = succ ci
+  setState (gi, ci')
+  return $ DoPa ci'
 
 p_char :: P Pattern
-p_char = p_dot <|> p_left_brace <|> p_escaped <|> p_other_char where
-  p_dot = char '.' >> char_index >>= return . PDot
-  p_left_brace = try $ (char '{' >> notFollowedBy digit >> char_index >>= return . (`PChar` '{'))
-  p_escaped = char '\\' >> anyChar >>= \c -> char_index >>= return . (`PEscape` c)
-  p_other_char = noneOf specials >>= \c -> char_index >>= return . (`PChar` c)
-    where specials  = "^.[$()|*+?{\\"
+p_char = p_dot <|> p_left_brace <|> p_escaped <|> p_other_char
+  where
+  p_dot = do
+    _ <- char '.'
+    PDot <$> char_index
+
+  p_left_brace = try $ do
+    _ <- char '{'
+    _ <- notFollowedBy digit
+    flip PChar '{' <$> char_index
+
+  p_escaped = do
+    _ <- char '\\'
+    flip PEscape <$> anyChar <*> char_index
+
+  p_other_char = flip PChar <$> noneOf "^.[$()|*+?{\\" <*> char_index
 
 -- parse [bar] and [^bar] sets of characters
 p_bracket :: P Pattern
@@ -161,15 +173,13 @@ p_set_elem_coll =  liftM BEColl $
 
 p_set_elem_range :: P BracketElement
 p_set_elem_range = try $ do
-  start <- noneOf "]"
-  _  <- char '-'
-  end <- noneOf "]"
+  start <- noneOf "]-"
+  _     <- char '-'
+  end   <- noneOf "]"
   return $ BERange start end
 
 p_set_elem_char :: P BracketElement
-p_set_elem_char = do
-  c <- noneOf "]"
-  return (BEChar c)
+p_set_elem_char = BEChar <$> noneOf "]"
 
 -- | Fail when 'BracketElement' is invalid, e.g. empty range @1-0@.
 -- This failure should not be caught.

diff --git a/lib/Text/Regex/TDFA/TDFA.hs b/lib/Text/Regex/TDFA/TDFA.hs
@@ -28,6 +28,7 @@ import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc,fromSinglesMerg
 import Text.Regex.TDFA.Pattern(Pattern)
 --import Text.Regex.TDFA.RunMutState(toInstructions)
 import Text.Regex.TDFA.TNFA(patternToNFA)
+import Utils
 --import Debug.Trace
 
 {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}

diff --git a/lib/Text/Regex/TDFA/TNFA.hs b/lib/Text/Regex/TDFA/TNFA.hs
@@ -29,8 +29,11 @@
 --
 -- Uses recursive do notation.
 
-module Text.Regex.TDFA.TNFA(patternToNFA
-                            ,QNFA(..),QT(..),QTrans,TagUpdate(..)) where
+module Text.Regex.TDFA.TNFA
+  ( patternToNFA
+  , decodeCharacterClass, decodePatternSet
+  , QNFA(..), QT(..), QTrans, TagUpdate(..)
+  ) where
 
 {- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
 
@@ -53,11 +56,12 @@ import qualified Data.Set as S(Set,insert,toAscList,empty)
 import Text.Regex.TDFA.Common(QT(..),QNFA(..),QTrans,TagTask(..),TagUpdate(..),DoPa(..)
                              ,CompOption(..)
                              ,Tag,TagTasks,TagList,Index,WinTags,GroupIndex,GroupInfo(..)
-                             ,common_error,noWin,snd3,mapSnd)
+                             ,common_error,noWin)
 import Text.Regex.TDFA.CorePattern(Q(..),P(..),OP(..),WhichTest,cleanNullView,NullView
                                   ,SetTestInfo(..),Wanted(..),TestInfo
                                   ,mustAccept,cannotAccept,patternToQ)
 import Text.Regex.TDFA.Pattern(Pattern(..),PatternSet(..),unSEC,PatternSetCharacterClass(..))
+import Utils
 --import Debug.Trace
 
 ecart :: String -> a -> a
@@ -786,7 +790,7 @@ ADD ORPHAN ID check and make this a fatal error while testing
 
 -}
 
--- | decodePatternSet cannot handle collating element and treats
+-- | @decodePatternSet@ cannot handle collating element and treats
 -- equivalence classes as just their definition and nothing more.
 decodePatternSet :: PatternSet -> S.Set Char
 decodePatternSet (PatternSet msc mscc _ msec) =
@@ -795,9 +799,9 @@ decodePatternSet (PatternSet msc mscc _ msec) =
       withMSEC = foldl (flip S.insert) withMSCC (maybe [] (concatMap unSEC . S.toAscList) msec)
   in withMSEC
 
--- | This returns the distinct ascending list of characters
--- represented by [: :] values in legalCharacterClasses; unrecognized
--- class names return an empty string
+-- | This returns the strictly ascending list of characters
+-- represented by @[: :]@ POSIX character classes.
+-- Unrecognized class names return an empty string.
 decodeCharacterClass :: PatternSetCharacterClass -> String
 decodeCharacterClass (PatternSetCharacterClass s) =
   case s of

diff --git a/lib/Utils.hs b/lib/Utils.hs
@@ -0,0 +1,75 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE RankNTypes #-}
+
+{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
+
+-- | Internal module for utilities used in the implementation.
+
+module Utils (module Utils, module X) where
+
+import Control.Applicative (Const(..))
+import Control.Applicative as X ((<*>))
+import Data.Functor as X
+import Data.Functor.Identity
+
+-- * Lenses
+---------------------------------------------------------------------------
+
+type Lens' o i = forall f. Functor f => (i -> f i) -> (o -> f o)
+
+type LensGet o i = o -> i
+type LensSet o i = i -> o -> o
+type LensMap o i = (i -> i) -> o -> o
+
+infixl 8 ^.
+-- | Get inner part @i@ of structure @o@ as designated by @Lens' o i@.
+(^.) :: o -> Lens' o i -> i
+o ^. l = getConst $ l Const o
+
+-- | Set inner part @i@ of structure @o@ as designated by @Lens' o i@.
+set :: Lens' o i -> LensSet o i
+set l = over l . const
+
+-- | Modify inner part @i@ of structure @o@ using a function @i -> i@.
+over :: Lens' o i -> LensMap o i
+over l f o = runIdentity $ l (Identity . f) o
+
+-- * Misc
+---------------------------------------------------------------------------
+
+#if !MIN_VERSION_base(4,11,0)
+(<&>) :: Functor f => f a -> (a -> b) -> f b
+(<&>) = flip fmap
+#endif
+
+-- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'.
+norep :: Eq a => [a] -> [a]
+norep = norepBy (==)
+
+-- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'.
+norepBy :: (a -> a -> Bool) -> [a] -> [a]
+norepBy _  []     = []
+norepBy eq (a:as) = loop a as
+  where
+  loop a []     = [a]
+  loop a (b:bs) = (if a `eq` b then id else (a:)) $ loop b bs
+
+mapFst :: Functor f => (t1 -> t2) -> f (t1, t) -> f (t2, t)
+mapFst f = fmap $ \ (a, b) -> (f a, b)
+
+mapSnd :: Functor f => (t1 -> t2) -> f (t, t1) -> f (t, t2)
+mapSnd f = fmap $ \ (a, b) -> (a, f b)
+
+fst3 :: (a,b,c) -> a
+fst3 (x,_,_) = x
+
+snd3 :: (a,b,c) -> b
+snd3 (_,x,_) = x
+
+thd3 :: (a,b,c) -> c
+thd3 (_,_,x) = x
+
+flipOrder :: Ordering -> Ordering
+flipOrder GT = LT
+flipOrder LT = GT
+flipOrder EQ = EQ
diff --git a/regex-tdfa.cabal b/regex-tdfa.cabal
@@ -84,7 +84,8 @@ library
                         Text.Regex.TDFA.Text
                         Text.Regex.TDFA.Text.Lazy
 
-  other-modules:        Paths_regex_tdfa
+  other-modules:        Utils
+                        Paths_regex_tdfa
 
   -- Support Semigroup instances uniformly
   --
@@ -123,6 +124,7 @@ library
                         UnboxedTuples
                         UnliftedFFITypes
   other-extensions:     CPP
+                        RankNTypes
 
   ghc-options:          -Wall -funbox-strict-fields -fspec-constr-count=10 -fno-warn-orphans