Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 1 addition & 37 deletions lib/Text/Regex/TDFA/Common.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import Data.Sequence as S(Seq)
--import Debug.Trace

import Text.Regex.TDFA.IntArrTrieSet(TrieSet)
import Utils

{-# INLINE look #-}
look :: Int -> IntMap a -> a
Expand All @@ -30,43 +31,6 @@ common_error :: String -> String -> a
common_error moduleName message =
error ("Explict error in module "++moduleName++" : "++message)

on :: (t1 -> t1 -> t2) -> (t -> t1) -> t -> t -> t2
f `on` g = (\x y -> (g x) `f` (g y))

-- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'.
norep :: (Eq a) => [a]->[a]
norep [] = []
norep x@[_] = x
norep (a:bs@(c:cs)) | a==c = norep (a:cs)
| otherwise = a:norep bs

-- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'.
norepBy :: (a -> a -> Bool) -> [a] -> [a]
norepBy _ [] = []
norepBy _ x@[_] = x
norepBy eqF (a:bs@(c:cs)) | a `eqF` c = norepBy eqF (a:cs)
| otherwise = a:norepBy eqF bs

mapFst :: (Functor f) => (t -> t2) -> f (t, t1) -> f (t2, t1)
mapFst f = fmap (\ (a,b) -> (f a,b))

mapSnd :: (Functor f) => (t1 -> t2) -> f (t, t1) -> f (t, t2)
mapSnd f = fmap (\ (a,b) -> (a,f b))

fst3 :: (a,b,c) -> a
fst3 (x,_,_) = x

snd3 :: (a,b,c) -> b
snd3 (_,x,_) = x

thd3 :: (a,b,c) -> c
thd3 (_,_,x) = x

flipOrder :: Ordering -> Ordering
flipOrder GT = LT
flipOrder LT = GT
flipOrder EQ = EQ

noWin :: WinTags -> Bool
noWin = null

Expand Down
1 change: 1 addition & 0 deletions lib/Text/Regex/TDFA/CorePattern.hs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import Data.Semigroup as Sem

import Text.Regex.TDFA.Common {- all -}
import Text.Regex.TDFA.Pattern(Pattern(..),starTrans)
import Utils
-- import Debug.Trace

{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
Expand Down
84 changes: 47 additions & 37 deletions lib/Text/Regex/TDFA/ReadRegex.hs
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ parseRegex x = runParser (do pat <- p_regex
type P = CharParser (GroupIndex, Int)

p_regex :: P Pattern
p_regex = liftM POr $ sepBy1 p_branch (char '|')
p_regex = POr <$> sepBy1 p_branch (char '|')

-- man re_format helps a lot, it says one-or-more pieces so this is
-- many1 not many. Use "()" to indicate an empty piece.
p_branch :: P Pattern
p_branch = liftM PConcat $ many1 p_piece
p_branch = PConcat <$> many1 p_piece

p_piece :: P Pattern
p_piece = (p_anchor <|> p_atom) >>= p_post_atom -- correct specification
Expand All @@ -62,35 +62,36 @@ group_index = do
return (Just index)

p_group :: P Pattern
p_group = lookAhead (char '(') >> do
index <- group_index
liftM (PGroup index) $ between (char '(') (char ')') p_regex
p_group = do
_ <- lookAhead (char '(')
PGroup <$> group_index <*> between (char '(') (char ')') p_regex

-- p_post_atom takes the previous atom as a parameter
p_post_atom :: Pattern -> P Pattern
p_post_atom atom = (char '?' >> return (PQuest atom))
<|> (char '+' >> return (PPlus atom))
<|> (char '*' >> return (PStar True atom))
p_post_atom atom = (char '?' $> PQuest atom)
<|> (char '+' $> PPlus atom)
<|> (char '*' $> PStar True atom)
<|> p_bound atom
<|> return atom

p_bound :: Pattern -> P Pattern
p_bound atom = try $ between (char '{') (char '}') (p_bound_spec atom)

p_bound_spec :: Pattern -> P Pattern
p_bound_spec atom = do lowS <- many1 digit
let lowI = read lowS
highMI <- option (Just lowI) $ try $ do
_ <- char ','
-- parsec note: if 'many digits' fails below then the 'try' ensures
-- that the ',' will not match the closing '}' in p_bound, same goes
-- for any non '}' garbage after the 'many digits'.
highS <- many digit
if null highS then return Nothing -- no upper bound
else do let highI = read highS
guard (lowI <= highI)
return (Just (read highS))
return (PBound lowI highMI atom)
p_bound_spec atom = do
lowI <- read <$> many1 digit
highMI <- option (Just lowI) $ try $ do
_ <- char ','
-- parsec note: if 'many digits' fails below then the 'try' ensures
-- that the ',' will not match the closing '}' in p_bound, same goes
-- for any non '}' garbage after the 'many digits'.
highS <- many digit
if null highS then return Nothing -- no upper bound
else do
let highI = read highS
guard (lowI <= highI)
return $ Just highI
return $ PBound lowI highMI atom

-- An anchor cannot be modified by a repetition specifier
p_anchor :: P Pattern
Expand All @@ -102,18 +103,29 @@ p_anchor = (char '^' >> liftM PCarat char_index)
<?> "empty () or anchor ^ or $"

char_index :: P DoPa
char_index = do (gi,ci) <- getState
let ci' = succ ci
setState (gi,ci')
return (DoPa ci')
char_index = do
(gi, ci) <- getState
let ci' = succ ci
setState (gi, ci')
return $ DoPa ci'

p_char :: P Pattern
p_char = p_dot <|> p_left_brace <|> p_escaped <|> p_other_char where
p_dot = char '.' >> char_index >>= return . PDot
p_left_brace = try $ (char '{' >> notFollowedBy digit >> char_index >>= return . (`PChar` '{'))
p_escaped = char '\\' >> anyChar >>= \c -> char_index >>= return . (`PEscape` c)
p_other_char = noneOf specials >>= \c -> char_index >>= return . (`PChar` c)
where specials = "^.[$()|*+?{\\"
p_char = p_dot <|> p_left_brace <|> p_escaped <|> p_other_char
where
p_dot = do
_ <- char '.'
PDot <$> char_index

p_left_brace = try $ do
_ <- char '{'
_ <- notFollowedBy digit
flip PChar '{' <$> char_index

p_escaped = do
_ <- char '\\'
flip PEscape <$> anyChar <*> char_index

p_other_char = flip PChar <$> noneOf "^.[$()|*+?{\\" <*> char_index

-- parse [bar] and [^bar] sets of characters
p_bracket :: P Pattern
Expand Down Expand Up @@ -161,15 +173,13 @@ p_set_elem_coll = liftM BEColl $

p_set_elem_range :: P BracketElement
p_set_elem_range = try $ do
start <- noneOf "]"
_ <- char '-'
end <- noneOf "]"
start <- noneOf "]-"
Copy link
Member Author

@andreasabel andreasabel Jul 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug here introduced by rebase.

_ <- char '-'
end <- noneOf "]"
return $ BERange start end

p_set_elem_char :: P BracketElement
p_set_elem_char = do
c <- noneOf "]"
return (BEChar c)
p_set_elem_char = BEChar <$> noneOf "]"

-- | Fail when 'BracketElement' is invalid, e.g. empty range @1-0@.
-- This failure should not be caught.
Expand Down
1 change: 1 addition & 0 deletions lib/Text/Regex/TDFA/TDFA.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc,fromSinglesMerg
import Text.Regex.TDFA.Pattern(Pattern)
--import Text.Regex.TDFA.RunMutState(toInstructions)
import Text.Regex.TDFA.TNFA(patternToNFA)
import Utils
--import Debug.Trace

{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
Expand Down
18 changes: 11 additions & 7 deletions lib/Text/Regex/TDFA/TNFA.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@
--
-- Uses recursive do notation.

module Text.Regex.TDFA.TNFA(patternToNFA
,QNFA(..),QT(..),QTrans,TagUpdate(..)) where
module Text.Regex.TDFA.TNFA
( patternToNFA
, decodeCharacterClass, decodePatternSet
, QNFA(..), QT(..), QTrans, TagUpdate(..)
) where

{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}

Expand All @@ -53,11 +56,12 @@ import qualified Data.Set as S(Set,insert,toAscList,empty)
import Text.Regex.TDFA.Common(QT(..),QNFA(..),QTrans,TagTask(..),TagUpdate(..),DoPa(..)
,CompOption(..)
,Tag,TagTasks,TagList,Index,WinTags,GroupIndex,GroupInfo(..)
,common_error,noWin,snd3,mapSnd)
,common_error,noWin)
import Text.Regex.TDFA.CorePattern(Q(..),P(..),OP(..),WhichTest,cleanNullView,NullView
,SetTestInfo(..),Wanted(..),TestInfo
,mustAccept,cannotAccept,patternToQ)
import Text.Regex.TDFA.Pattern(Pattern(..),PatternSet(..),unSEC,PatternSetCharacterClass(..))
import Utils
--import Debug.Trace

ecart :: String -> a -> a
Expand Down Expand Up @@ -786,7 +790,7 @@ ADD ORPHAN ID check and make this a fatal error while testing

-}

-- | decodePatternSet cannot handle collating element and treats
-- | @decodePatternSet@ cannot handle collating element and treats
-- equivalence classes as just their definition and nothing more.
decodePatternSet :: PatternSet -> S.Set Char
decodePatternSet (PatternSet msc mscc _ msec) =
Expand All @@ -795,9 +799,9 @@ decodePatternSet (PatternSet msc mscc _ msec) =
withMSEC = foldl (flip S.insert) withMSCC (maybe [] (concatMap unSEC . S.toAscList) msec)
in withMSEC

-- | This returns the distinct ascending list of characters
-- represented by [: :] values in legalCharacterClasses; unrecognized
-- class names return an empty string
-- | This returns the strictly ascending list of characters
-- represented by @[: :]@ POSIX character classes.
-- Unrecognized class names return an empty string.
decodeCharacterClass :: PatternSetCharacterClass -> String
decodeCharacterClass (PatternSetCharacterClass s) =
case s of
Expand Down
75 changes: 75 additions & 0 deletions lib/Utils.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE RankNTypes #-}

{-# OPTIONS_GHC -fno-warn-name-shadowing #-}

-- | Internal module for utilities used in the implementation.

module Utils (module Utils, module X) where

import Control.Applicative (Const(..))
import Control.Applicative as X ((<*>))
import Data.Functor as X
import Data.Functor.Identity

-- * Lenses
---------------------------------------------------------------------------

type Lens' o i = forall f. Functor f => (i -> f i) -> (o -> f o)

type LensGet o i = o -> i
type LensSet o i = i -> o -> o
type LensMap o i = (i -> i) -> o -> o

infixl 8 ^.
-- | Get inner part @i@ of structure @o@ as designated by @Lens' o i@.
(^.) :: o -> Lens' o i -> i
o ^. l = getConst $ l Const o

-- | Set inner part @i@ of structure @o@ as designated by @Lens' o i@.
set :: Lens' o i -> LensSet o i
set l = over l . const

-- | Modify inner part @i@ of structure @o@ using a function @i -> i@.
over :: Lens' o i -> LensMap o i
over l f o = runIdentity $ l (Identity . f) o

-- * Misc
---------------------------------------------------------------------------

#if !MIN_VERSION_base(4,11,0)
(<&>) :: Functor f => f a -> (a -> b) -> f b
(<&>) = flip fmap
#endif

-- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'.
norep :: Eq a => [a] -> [a]
norep = norepBy (==)

-- | After 'sort' or 'sortBy' the use of 'nub' or 'nubBy' can be replaced by 'norep' or 'norepBy'.
norepBy :: (a -> a -> Bool) -> [a] -> [a]
norepBy _ [] = []
norepBy eq (a:as) = loop a as
where
loop a [] = [a]
loop a (b:bs) = (if a `eq` b then id else (a:)) $ loop b bs

mapFst :: Functor f => (t1 -> t2) -> f (t1, t) -> f (t2, t)
mapFst f = fmap $ \ (a, b) -> (f a, b)

mapSnd :: Functor f => (t1 -> t2) -> f (t, t1) -> f (t, t2)
mapSnd f = fmap $ \ (a, b) -> (a, f b)

fst3 :: (a,b,c) -> a
fst3 (x,_,_) = x

snd3 :: (a,b,c) -> b
snd3 (_,x,_) = x

thd3 :: (a,b,c) -> c
thd3 (_,_,x) = x

flipOrder :: Ordering -> Ordering
flipOrder GT = LT
flipOrder LT = GT
flipOrder EQ = EQ
4 changes: 3 additions & 1 deletion regex-tdfa.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ library
Text.Regex.TDFA.Text
Text.Regex.TDFA.Text.Lazy

other-modules: Paths_regex_tdfa
other-modules: Utils
Paths_regex_tdfa

-- Support Semigroup instances uniformly
--
Expand Down Expand Up @@ -123,6 +124,7 @@ library
UnboxedTuples
UnliftedFFITypes
other-extensions: CPP
RankNTypes

ghc-options: -Wall -funbox-strict-fields -fspec-constr-count=10 -fno-warn-orphans

Expand Down