diff --git a/Strata/Languages/Boogie/Boogie.lean b/Strata/Languages/Boogie/Boogie.lean index ce36f468..26b4ace8 100644 --- a/Strata/Languages/Boogie/Boogie.lean +++ b/Strata/Languages/Boogie/Boogie.lean @@ -34,19 +34,28 @@ namespace Boogie types. -/ -def typeCheck (options : Options) (program : Program) : Except Std.Format Program := do +def typeCheck (options : Options) (program : Program) + (moreFns : @Lambda.Factory BoogieLParams := Lambda.Factory.default) : + Except Std.Format Program := do let T := Lambda.TEnv.default - let C := { Lambda.LContext.default with functions := Boogie.Factory, knownTypes := Boogie.KnownTypes } + let factory ← Boogie.Factory.addFactory moreFns + let C := { Lambda.LContext.default with + functions := factory, + knownTypes := Boogie.KnownTypes } let (program, _T) ← Program.typeCheck C T program -- dbg_trace f!"[Strata.Boogie] Type variables:\n{T.state.substInfo.subst.length}" -- dbg_trace f!"[Strata.Boogie] Annotated program:\n{program}" if options.verbose then dbg_trace f!"[Strata.Boogie] Type checking succeeded.\n" return program -def typeCheckAndPartialEval (options : Options) (program : Program) : - Except Std.Format (List (Program × Env)) := do - let program ← typeCheck options program - let E := { Env.init with program := program } +def typeCheckAndPartialEval (options : Options) (program : Program) + (moreFns : @Lambda.Factory BoogieLParams := Lambda.Factory.default) : + Except Std.Format (List (Program × Env)) := do + let program ← typeCheck options program moreFns + let σ ← (Lambda.LState.init).addFactory Boogie.Factory + let σ ← σ.addFactory moreFns + let E := { Env.init with exprEnv := σ, + program := program } let pEs := Program.eval E if options.verbose then do dbg_trace f!"{Std.Format.line}VCs:" diff --git a/Strata/Languages/Boogie/Examples/FailingAssertion.lean b/Strata/Languages/Boogie/Examples/FailingAssertion.lean index 1f2815f4..b555fdda 100644 --- a/Strata/Languages/Boogie/Examples/FailingAssertion.lean +++ b/Strata/Languages/Boogie/Examples/FailingAssertion.lean @@ -63,9 +63,10 @@ Proof Obligation: Wrote problem to vcs/assert_0.smt2. -Obligation assert_0: solver error! +Obligation assert_0: could not be proved! -Error: Cannot find model for id: f1 +Result: failed +CEx: ⏎ Evaluated program: type MapII := (Map int int) @@ -80,7 +81,8 @@ assert [assert_0] (((~select $__a0) #0) == #1) --- info: Obligation: assert_0 -Result: err Cannot find model for id: f1 +Result: failed +CEx: -/ #guard_msgs in #eval verify "cvc5" failing diff --git a/Strata/Languages/Boogie/Verifier.lean b/Strata/Languages/Boogie/Verifier.lean index ed39c6d1..8fd465e8 100644 --- a/Strata/Languages/Boogie/Verifier.lean +++ b/Strata/Languages/Boogie/Verifier.lean @@ -127,8 +127,13 @@ def solverResult (vars : List (IdentT LMonoTy Visibility)) (ans : String) match verdict with | "sat" => let rawModel ← getModel rest - let model ← processModel vars rawModel ctx E - .ok (.sat model) + -- We suppress any counterexample processing errors. + -- Likely, these would be because of the suboptimal implementation + -- of the counterexample parser, which shouldn't hold back useful + -- feedback (i.e., problem was `sat`) from the user. + match (processModel vars rawModel ctx E) with + | .ok model => .ok (.sat model) + | .error _model_err => (.ok (.sat [])) | "unsat" => .ok .unsat | "unknown" => .ok .unknown | _ => .error ans @@ -297,8 +302,11 @@ def verifySingleEnv (smtsolver : String) (pE : Program × Env) (options : Option if options.stopOnFirstError then break return results -def verify (smtsolver : String) (program : Program) (options : Options := Options.default) : EIO Format VCResults := do - match Boogie.typeCheckAndPartialEval options program with +def verify (smtsolver : String) (program : Program) + (options : Options := Options.default) + (moreFns : @Lambda.Factory BoogieLParams := Lambda.Factory.default) : + EIO Format VCResults := do + match Boogie.typeCheckAndPartialEval options program moreFns with | .error err => .error f!"[Strata.Boogie] Type checking error: {format err}" | .ok pEs => @@ -316,12 +324,13 @@ namespace Strata open Lean.Parser -def typeCheck (ictx : InputContext) (env : Program) (options : Options := Options.default) : +def typeCheck (ictx : InputContext) (env : Program) (options : Options := Options.default) + (moreFns : @Lambda.Factory Boogie.BoogieLParams := Lambda.Factory.default) : Except Std.Format Boogie.Program := do let (program, errors) := TransM.run ictx (translateProgram env) if errors.isEmpty then -- dbg_trace f!"AST: {program}" - Boogie.typeCheck options program + Boogie.typeCheck options program moreFns else .error s!"DDM Transform Error: {repr errors}" @@ -333,12 +342,14 @@ def Boogie.getProgram def verify (smtsolver : String) (env : Program) (ictx : InputContext := Inhabited.default) - (options : Options := Options.default) : IO Boogie.VCResults := do + (options : Options := Options.default) + (moreFns : @Lambda.Factory Boogie.BoogieLParams := Lambda.Factory.default) + : IO Boogie.VCResults := do let (program, errors) := Boogie.getProgram env ictx if errors.isEmpty then -- dbg_trace f!"AST: {program}" EIO.toIO (fun f => IO.Error.userError (toString f)) - (Boogie.verify smtsolver program options) + (Boogie.verify smtsolver program options moreFns) else panic! s!"DDM Transform Error: {repr errors}" diff --git a/Strata/Languages/Python/BoogiePrelude.lean b/Strata/Languages/Python/BoogiePrelude.lean index 7f3f631a..558c9540 100644 --- a/Strata/Languages/Python/BoogiePrelude.lean +++ b/Strata/Languages/Python/BoogiePrelude.lean @@ -14,142 +14,308 @@ namespace Strata def boogiePrelude := #strata program Boogie; -type StrataHeap; -type StrataRef; -type StrataField (t: Type); -// Type constructors -type ListStr; type None; +const None_none : None; + type Object; +function Object_len(x : Object) : int; +axiom [Object_len_ge_zero]: (forall x : Object :: Object_len(x) >= 0); + +function inheritsFrom(child : string, parent : string) : (bool); +axiom [inheritsFrom_refl]: (forall s: string :: {inheritsFrom(s, s)} inheritsFrom(s, s)); + +///////////////////////////////////////////////////////////////////////////////////// + +// Exceptions +// TODO: Formalize the exception hierarchy here: +// https://docs.python.org/3/library/exceptions.html#exception-hierarchy +// We use the name "Error" to stand for Python's Exceptions + +// our own special indicator, Unimplemented which is an artifact of +// Strata that indicates that our models is partial. +type Error; + +// Constructors +function Error_TypeError (msg : string) : Error; +function Error_AttributeError (msg : string) : Error; +function Error_RePatternError (msg : string) : Error; +function Error_Unimplemented (msg : string) : Error; + +// Testers +function Error_isTypeError (e : Error) : bool; +function Error_isAttributeError (e : Error) : bool; +function Error_isRePatternError (e : Error) : bool; +function Error_isUnimplemented (e : Error) : bool; + +// Destructors +function Error_getTypeError (e : Error) : string; +function Error_getAttributeError (e : Error) : string; +function Error_getRePatternError (e : Error) : string; +function Error_getUnimplemented (e : Error) : string; + +// Axioms +// Testers of Constructors +axiom [Error_isTypeError_TypeError]: + (forall msg : string :: {(Error_TypeError(msg))} + Error_isTypeError(Error_TypeError(msg))); +axiom [Error_isAttributeError_AttributeError]: + (forall msg : string :: {(Error_AttributeError(msg))} + Error_isAttributeError(Error_AttributeError(msg))); +axiom [Error_isRePatternError_RePatternError]: + (forall msg : string :: + Error_isRePatternError(Error_RePatternError(msg))); +axiom [Error_isUnimplemented_Unimplemented]: + (forall msg : string :: + Error_isUnimplemented(Error_Unimplemented(msg))); +// Destructors of Constructors +axiom [Error_getTypeError_TypeError]: + (forall msg : string :: + Error_getTypeError(Error_TypeError(msg)) == msg); +axiom [Error_getAttributeError_AttributeError]: + (forall msg : string :: + Error_getAttributeError(Error_AttributeError(msg)) == msg); +axiom [Error_getUnimplemented_Unimplemented]: + (forall msg : string :: + Error_getUnimplemented(Error_Unimplemented(msg)) == msg); + +// ///////////////////////////////////////////////////////////////////////////////////// +// ///////////////////////////////////////////////////////////////////////////////////// +// Regular Expressions + +type Except (err : Type, ok : Type); + +// FIXME: +// Once DDM support polymorphic functions (and not just type declarations), +// we will be able to define the following generic functions and axioms. For now, +// we manually define appropriate instantiations. +// Also: when ADT support is lifted up to Boogie, all these +// constructors, testers, destructors, and axioms will be auto-generated. +// How will the DDM keep track of them? + +// // Constructors +// function Except_mkOK(err : Type, ok : Type, val : ok) : Except err ok; +// function Except_mkErr(err : Type, ok : Type, val : err) : Except err ok; +// // Testers +// function Except_isOK(err : Type, ok : Type, x : Except err ok) : bool; +// function Except_isErr(err : Type, ok : Type, x : Except err ok) : bool; +// // Destructors +// function Except_getOK(err : Type, ok : Type, x : Except err ok) : ok; +// function Except_getErr(err : Type, ok : Type, x : Except err ok) : err; +// // Axioms +// // Testers of Constructors +// axiom [Except_isOK_mkOK]: (forall x : ok :: Except_isOK(Except_mkOK x)); +// axiom [Except_isErr_mkErr]: (forall x : err :: Except_isErr(Except_mkErr x)); +// // Destructors of Constructors +// axiom [Except_getOK_mkOK]: (forall x : ok :: Except_getOK(Except_mkOK x) == x); +// axiom [Except_getErr_mkErr]: (forall x : err :: Except_isErr(Except_mkErr x)); + +type ExceptErrorRegex := Except Error regex; + +// Constructors +function ExceptErrorRegex_mkOK(x : regex) : ExceptErrorRegex; +function ExceptErrorRegex_mkErr(x : Error) : ExceptErrorRegex; +// Testers +function ExceptErrorRegex_isOK(x : ExceptErrorRegex) : bool; +function ExceptErrorRegex_isErr(x : ExceptErrorRegex) : bool; +// Destructors +function ExceptErrorRegex_getOK(x : ExceptErrorRegex) : regex; +function ExceptErrorRegex_getErr(x : ExceptErrorRegex) : Error; +// Axioms +// Testers of Constructors +axiom [ExceptErrorRegex_isOK_mkOK]: + (forall x : regex :: {(ExceptErrorRegex_mkOK(x))} + ExceptErrorRegex_isOK(ExceptErrorRegex_mkOK(x))); +axiom [ExceptErrorRegex_isError_mkErr]: + (forall e : Error :: {(ExceptErrorRegex_mkErr(e))} + ExceptErrorRegex_isErr(ExceptErrorRegex_mkErr(e))); +// Destructors of Constructors +axiom [ExceptErrorRegex_getOK_mkOK]: + (forall x : regex :: {(ExceptErrorRegex_mkOK(x))} + ExceptErrorRegex_getOK(ExceptErrorRegex_mkOK(x)) == x); +axiom [ExceptErrorRegex_getError_mkError]: + (forall e : Error :: {(ExceptErrorRegex_mkErr(e))} + ExceptErrorRegex_getErr(ExceptErrorRegex_mkErr(e)) == e); + +// NOTE: `re.match` returns a `Re.Match` object, but for now, we are interested +// only in match/nomatch, which is why we return `bool` here. +function PyReMatchRegex(pattern : regex, str : string, flags : int) : bool; +// We only support Re.Match when flags == 0. +axiom [PyReMatchRegex_def_noFlg]: + (forall pattern : regex, str : string :: {PyReMatchRegex(pattern, str, 0)} + PyReMatchRegex(pattern, str, 0) == str.in.re(str, pattern)); + +// Unsupported/uninterpreted: eventually, this would first call PyReCompile and if there's +// no exception, call PyReMatchRegex. +function PyReMatchStr(pattern : string, str : string, flags : int) : Except Error bool; + +///////////////////////////////////////////////////////////////////////////////////// + +// List of strings +type ListStr; +function ListStr_nil() : (ListStr); +function ListStr_cons(x0 : string, x1 : ListStr) : (ListStr); + +///////////////////////////////////////////////////////////////////////////////////// + +// Uninterpreted procedures +procedure importFrom(module : string, names : ListStr, level : int) returns (); +procedure import(names : ListStr) returns (); +procedure print(msg : string) returns (); + +///////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////// + +// Temporary Types + type ExceptOrNone; +type ExceptCode := string; type ExceptNone; +const Except_none : ExceptNone; type ExceptOrNoneTag; -type StrOrNone; -type StrOrNoneTag; -type AnyOrNone; -type AnyOrNoneTag; -type BoolOrNone; -type BoolOrNoneTag; -type BoolOrStrOrNone; -type BoolOrStrOrNoneTag; +const EN_STR_TAG : ExceptOrNoneTag; +const EN_NONE_TAG : ExceptOrNoneTag; +function ExceptOrNone_tag(v : ExceptOrNone) : ExceptOrNoneTag; +function ExceptOrNone_code_val(v : ExceptOrNone) : ExceptCode; +function ExceptOrNone_none_val(v : ExceptOrNone) : ExceptNone; +function ExceptOrNone_mk_code(s : ExceptCode) : ExceptOrNone; +function ExceptOrNone_mk_none(v : ExceptNone) : ExceptOrNone; +axiom [ExceptOrNone_mk_code_axiom]: (forall s : ExceptCode :: {(ExceptOrNone_mk_code(s))} + ExceptOrNone_tag(ExceptOrNone_mk_code(s)) == EN_STR_TAG && + ExceptOrNone_code_val(ExceptOrNone_mk_code(s)) == s); +axiom [ExceptOrNone_mk_none_axiom]: (forall n : ExceptNone :: {(ExceptOrNone_mk_none(n))} + ExceptOrNone_tag(ExceptOrNone_mk_none(n)) == EN_NONE_TAG && + ExceptOrNone_none_val(ExceptOrNone_mk_none(n)) == n); +axiom [ExceptOrNone_tag_axiom]: (forall v : ExceptOrNone :: {ExceptOrNone_tag(v)} + ExceptOrNone_tag(v) == EN_STR_TAG || + ExceptOrNone_tag(v) == EN_NONE_TAG); +axiom [unique_ExceptOrNoneTag]: EN_STR_TAG != EN_NONE_TAG; + +// IntOrNone type IntOrNone; type IntOrNoneTag; -type BytesOrStrOrNone; -type BytesOrStrOrNoneTag; -type MappingStrStrOrNone; -type MappingStrStrOrNoneTag; -type DictStrAny; -type S3Client; -type CloudWatchClient; -type Client; -type ClientTag; - -// Type synonyms -type ExceptCode := string; +const IN_INT_TAG : IntOrNoneTag; +const IN_NONE_TAG : IntOrNoneTag; +function IntOrNone_tag(v : IntOrNone) : IntOrNoneTag; +function IntOrNone_int_val(v : IntOrNone) : int; +function IntOrNone_none_val(v : IntOrNone) : None; +function IntOrNone_mk_int(i : int) : IntOrNone; +function IntOrNone_mk_none(v : None) : IntOrNone; +axiom (forall i : int :: {(IntOrNone_mk_int(i))} + IntOrNone_tag(IntOrNone_mk_int(i)) == IN_INT_TAG && + IntOrNone_int_val(IntOrNone_mk_int(i)) == i); +axiom (forall n : None :: {(IntOrNone_mk_none(n))} + IntOrNone_tag(IntOrNone_mk_none(n)) == IN_NONE_TAG && + IntOrNone_none_val(IntOrNone_mk_none(n)) == n); +axiom (forall v : IntOrNone :: {IntOrNone_tag(v)} + IntOrNone_tag(v) == IN_INT_TAG || + IntOrNone_tag(v) == IN_NONE_TAG); +axiom [unique_IntOrNoneTag]: IN_INT_TAG != IN_NONE_TAG; -// Constants -const None_none : None; -const Except_none : ExceptNone; -const EN_STR_TAG : ExceptOrNoneTag; -const EN_NONE_TAG : ExceptOrNoneTag; +// StrOrNone +type StrOrNone; +type StrOrNoneTag; const SN_STR_TAG : StrOrNoneTag; const SN_NONE_TAG : StrOrNoneTag; +function StrOrNone_tag(v : StrOrNone) : StrOrNoneTag; +function StrOrNone_str_val(v : StrOrNone) : string; +function StrOrNone_none_val(v : StrOrNone) : None; +function StrOrNone_mk_str(s : string) : StrOrNone; +function StrOrNone_mk_none(v : None) : StrOrNone; + +axiom [StrOrNone_tag_of_mk_str_axiom]: (forall s : string :: {StrOrNone_tag(StrOrNone_mk_str(s)), (StrOrNone_mk_str(s))} + StrOrNone_tag(StrOrNone_mk_str(s)) == SN_STR_TAG); +axiom [StrOrNone_val_of_mk_str_axiom]: (forall s : string :: {StrOrNone_str_val(StrOrNone_mk_str(s)), (StrOrNone_mk_str(s))} + StrOrNone_str_val(StrOrNone_mk_str(s)) == s); +axiom [StrOrNone_mk_none_axiom]: (forall n : None :: {(StrOrNone_mk_none(n))} + StrOrNone_tag(StrOrNone_mk_none(n)) == SN_NONE_TAG && + StrOrNone_none_val(StrOrNone_mk_none(n)) == n); +axiom [StrOrNone_tag_axiom]: (forall v : StrOrNone :: {StrOrNone_tag(v)} + StrOrNone_tag(v) == SN_STR_TAG || + StrOrNone_tag(v) == SN_NONE_TAG); +axiom [unique_StrOrNoneTag]: SN_STR_TAG != SN_NONE_TAG; + +function strOrNone_toObject(v : StrOrNone) : Object; +// Injectivity axiom: different StrOrNone map to different objects. +axiom (forall s1:StrOrNone, s2: StrOrNone :: {strOrNone_toObject(s1), strOrNone_toObject(s2)} + s1 != s2 ==> + strOrNone_toObject(s1) != strOrNone_toObject(s2)); +axiom (forall s : StrOrNone :: {StrOrNone_tag(s)} + StrOrNone_tag(s) == SN_STR_TAG ==> + Object_len(strOrNone_toObject(s)) == str.len(StrOrNone_str_val(s))); + +// AnyOrNone +type AnyOrNone; +type AnyOrNoneTag; const AN_ANY_TAG : AnyOrNoneTag; const AN_NONE_TAG : AnyOrNoneTag; +function AnyOrNone_tag(v : AnyOrNone) : AnyOrNoneTag; +function AnyOrNone_str_val(v : AnyOrNone) : string; +function AnyOrNone_none_val(v : AnyOrNone) : None; +function AnyOrNone_mk_str(s : string) : AnyOrNone; +function AnyOrNone_mk_none(v : None) : AnyOrNone; +axiom (forall s : string :: {(AnyOrNone_mk_str(s))} + AnyOrNone_tag(AnyOrNone_mk_str(s)) == AN_ANY_TAG && + AnyOrNone_str_val(AnyOrNone_mk_str(s)) == s); +axiom (forall n : None :: {(AnyOrNone_mk_none(n))} + AnyOrNone_tag(AnyOrNone_mk_none(n)) == AN_NONE_TAG && + AnyOrNone_none_val(AnyOrNone_mk_none(n)) == n); +axiom (forall v : AnyOrNone :: {AnyOrNone_tag(v)} + AnyOrNone_tag(v) == AN_ANY_TAG || + AnyOrNone_tag(v) == AN_NONE_TAG); +axiom [unique_AnyOrNoneTag]: AN_ANY_TAG != AN_NONE_TAG; + +// BoolOrNone +type BoolOrNone; +type BoolOrNoneTag; const BN_BOOL_TAG : BoolOrNoneTag; const BN_NONE_TAG : BoolOrNoneTag; +function BoolOrNone_tag(v : BoolOrNone) : BoolOrNoneTag; +function BoolOrNone_str_val(v : BoolOrNone) : string; +function BoolOrNone_none_val(v : BoolOrNone) : None; +function BoolOrNone_mk_str(s : string) : BoolOrNone; +function BoolOrNone_mk_none(v : None) : BoolOrNone; +axiom (forall s : string :: {BoolOrNone_mk_str(s)} + BoolOrNone_tag(BoolOrNone_mk_str(s)) == BN_BOOL_TAG && + BoolOrNone_str_val(BoolOrNone_mk_str(s)) == s); +axiom (forall n : None :: {BoolOrNone_mk_none(n)} + BoolOrNone_tag(BoolOrNone_mk_none(n)) == BN_NONE_TAG && + BoolOrNone_none_val(BoolOrNone_mk_none(n)) == n); +axiom (forall v : BoolOrNone :: {BoolOrNone_tag(v)} + BoolOrNone_tag(v) == BN_BOOL_TAG || + BoolOrNone_tag(v) == BN_NONE_TAG); +axiom [unique_BoolOrNoneTag]: BN_BOOL_TAG != BN_NONE_TAG; + +// BoolOrStrOrNone +type BoolOrStrOrNone; +type BoolOrStrOrNoneTag; const BSN_BOOL_TAG : BoolOrStrOrNoneTag; const BSN_STR_TAG : BoolOrStrOrNoneTag; const BSN_NONE_TAG : BoolOrStrOrNoneTag; -const C_S3_TAG : ClientTag; -const C_CW_TAG : ClientTag; - - -function ListStr_nil() : (ListStr); -function ListStr_cons(x0 : string, x1 : ListStr) : (ListStr); -function Object_len(x : Object) : (int); -function inheritsFrom(child : string, parent : string) : (bool); -function ExceptOrNone_tag(v : ExceptOrNone) : (ExceptOrNoneTag); -function ExceptOrNone_code_val(v : ExceptOrNone) : (ExceptCode); -function ExceptOrNone_none_val(v : ExceptOrNone) : (ExceptNone); -function ExceptOrNone_mk_code(s : ExceptCode) : (ExceptOrNone); -function ExceptOrNone_mk_none(v : ExceptNone) : (ExceptOrNone); -function StrOrNone_tag(v : StrOrNone) : (StrOrNoneTag); -function StrOrNone_str_val(v : StrOrNone) : (string); -function StrOrNone_none_val(v : StrOrNone) : (None); -function StrOrNone_mk_str(s : string) : (StrOrNone); -function StrOrNone_mk_none(v : None) : (StrOrNone); -function strOrNone_toObject(x0 : StrOrNone) : (Object); -function AnyOrNone_tag(v : AnyOrNone) : (AnyOrNoneTag); -function AnyOrNone_str_val(v : AnyOrNone) : (string); -function AnyOrNone_none_val(v : AnyOrNone) : (None); -function AnyOrNone_mk_str(s : string) : (AnyOrNone); -function AnyOrNone_mk_none(v : None) : (AnyOrNone); -function IntOrNone_mk_none(v : None) : (IntOrNone); -function BytesOrStrOrNone_mk_none(v : None) : (BytesOrStrOrNone); -function BytesOrStrOrNone_mk_str(s : string) : (BytesOrStrOrNone); -function MappingStrStrOrNone_mk_none(v : None) : (MappingStrStrOrNone); -function BoolOrNone_tag(v : BoolOrNone) : (BoolOrNoneTag); -function BoolOrNone_str_val(v : BoolOrNone) : (string); -function BoolOrNone_none_val(v : BoolOrNone) : (None); -function BoolOrNone_mk_str(s : string) : (BoolOrNone); -function BoolOrNone_mk_none(v : None) : (BoolOrNone); -function BoolOrStrOrNone_tag(v : BoolOrStrOrNone) : (BoolOrStrOrNoneTag); -function BoolOrStrOrNone_bool_val(v : BoolOrStrOrNone) : (bool); -function BoolOrStrOrNone_str_val(v : BoolOrStrOrNone) : (string); -function BoolOrStrOrNone_none_val(v : BoolOrStrOrNone) : (None); -function BoolOrStrOrNone_mk_bool(b : bool) : (BoolOrStrOrNone); -function BoolOrStrOrNone_mk_str(s : string) : (BoolOrStrOrNone); -function BoolOrStrOrNone_mk_none(v : None) : (BoolOrStrOrNone); -function Client_tag(v : Client) : (ClientTag); - -// Unique const axioms -axiom [unique_ExceptOrNoneTag]: EN_STR_TAG != EN_NONE_TAG; -axiom [unique_StrOrNoneTag]: SN_STR_TAG != SN_NONE_TAG; -axiom [unique_AnyOrNoneTag]: AN_ANY_TAG != AN_NONE_TAG; -axiom [unique_BoolOrNoneTag]: BN_BOOL_TAG != BN_NONE_TAG; +function BoolOrStrOrNone_tag(v : BoolOrStrOrNone) : BoolOrStrOrNoneTag; +function BoolOrStrOrNone_bool_val(v : BoolOrStrOrNone) : bool; +function BoolOrStrOrNone_str_val(v : BoolOrStrOrNone) : string; +function BoolOrStrOrNone_none_val(v : BoolOrStrOrNone) : None; +function BoolOrStrOrNone_mk_bool(b : bool) : BoolOrStrOrNone; +function BoolOrStrOrNone_mk_str(s : string) : BoolOrStrOrNone; +function BoolOrStrOrNone_mk_none(v : None) : BoolOrStrOrNone; +axiom (forall b : bool :: {BoolOrStrOrNone_mk_bool(b)} + BoolOrStrOrNone_tag(BoolOrStrOrNone_mk_bool(b)) == BSN_BOOL_TAG && + BoolOrStrOrNone_bool_val(BoolOrStrOrNone_mk_bool(b)) == b); +axiom (forall s : string :: {BoolOrStrOrNone_mk_str(s)} + BoolOrStrOrNone_tag(BoolOrStrOrNone_mk_str(s)) == BSN_STR_TAG && + BoolOrStrOrNone_str_val(BoolOrStrOrNone_mk_str(s)) == s); +axiom (forall n : None :: {BoolOrStrOrNone_mk_none(n)} + BoolOrStrOrNone_tag(BoolOrStrOrNone_mk_none(n)) == BSN_NONE_TAG && + BoolOrStrOrNone_none_val(BoolOrStrOrNone_mk_none(n)) == n); +axiom (forall v : BoolOrStrOrNone :: {BoolOrStrOrNone_tag(v)} + BoolOrStrOrNone_tag(v) == BSN_BOOL_TAG || + BoolOrStrOrNone_tag(v) == BSN_STR_TAG || + BoolOrStrOrNone_tag(v) == BSN_NONE_TAG); axiom [unique_BoolOrStrOrNoneTag]: BSN_BOOL_TAG != BSN_STR_TAG && BSN_BOOL_TAG != BSN_NONE_TAG && BSN_STR_TAG != BSN_NONE_TAG; -axiom [unique_ClientTag]: C_S3_TAG != C_CW_TAG; - -// Axioms -axiom [ax_l61c1]: (forall x: Object :: {Object_len(x)} (Object_len(x) >= 0)); -axiom [ax_l93c1]: (forall s: string :: {inheritsFrom(s, s)} inheritsFrom(s, s)); -axiom [ax_l114c1]: (forall s: ExceptCode :: {ExceptOrNone_mk_code(s)} ((ExceptOrNone_tag(ExceptOrNone_mk_code(s)) == EN_STR_TAG) && (ExceptOrNone_code_val(ExceptOrNone_mk_code(s)) == s))); -axiom [ax_l117c1]: (forall n: ExceptNone :: {ExceptOrNone_mk_none(n)} ((ExceptOrNone_tag(ExceptOrNone_mk_none(n)) == EN_NONE_TAG) && (ExceptOrNone_none_val(ExceptOrNone_mk_none(n)) == n))); -axiom [ax_l120c1]: (forall v: ExceptOrNone :: {ExceptOrNone_tag(v)} ((ExceptOrNone_tag(v) == EN_STR_TAG) || (ExceptOrNone_tag(v) == EN_NONE_TAG))); -axiom [ax_l141c1]: (forall s: string :: {StrOrNone_mk_str(s)} ((StrOrNone_tag(StrOrNone_mk_str(s)) == SN_STR_TAG) && (StrOrNone_str_val(StrOrNone_mk_str(s)) == s))); -axiom [ax_l144c1]: (forall n: None :: {StrOrNone_mk_none(n)} ((StrOrNone_tag(StrOrNone_mk_none(n)) == SN_NONE_TAG) && (StrOrNone_none_val(StrOrNone_mk_none(n)) == n))); -axiom [ax_l147c1]: (forall v: StrOrNone :: {StrOrNone_tag(v)} ((StrOrNone_tag(v) == SN_STR_TAG) || (StrOrNone_tag(v) == SN_NONE_TAG))); -axiom [ax_l153c1]: (forall s1: StrOrNone, s2: StrOrNone :: {strOrNone_toObject(s1), strOrNone_toObject(s2)} ((s1 != s2) ==> (strOrNone_toObject(s1) != strOrNone_toObject(s2)))); -axiom [ax_l155c1]: (forall s: StrOrNone :: {StrOrNone_tag(s)} ((StrOrNone_tag(s) == SN_STR_TAG) ==> (Object_len(strOrNone_toObject(s)) == str.len(StrOrNone_str_val(s))))); -axiom [ax_l170c1]: (forall s: string :: {AnyOrNone_mk_str(s)} ((AnyOrNone_tag(AnyOrNone_mk_str(s)) == AN_ANY_TAG) && (AnyOrNone_str_val(AnyOrNone_mk_str(s)) == s))); -axiom [ax_l173c1]: (forall n: None :: {AnyOrNone_mk_none(n)} ((AnyOrNone_tag(AnyOrNone_mk_none(n)) == AN_NONE_TAG) && (AnyOrNone_none_val(AnyOrNone_mk_none(n)) == n))); -axiom [ax_l176c1]: (forall v: AnyOrNone :: {AnyOrNone_tag(v)} ((AnyOrNone_tag(v) == AN_ANY_TAG) || (AnyOrNone_tag(v) == AN_NONE_TAG))); -axiom [ax_l191c1]: (forall s: string :: {BoolOrNone_mk_str(s)} ((BoolOrNone_tag(BoolOrNone_mk_str(s)) == BN_BOOL_TAG) && (BoolOrNone_str_val(BoolOrNone_mk_str(s)) == s))); -axiom [ax_l194c1]: (forall n: None :: {BoolOrNone_mk_none(n)} ((BoolOrNone_tag(BoolOrNone_mk_none(n)) == BN_NONE_TAG) && (BoolOrNone_none_val(BoolOrNone_mk_none(n)) == n))); -axiom [ax_l197c1]: (forall v: BoolOrNone :: {BoolOrNone_tag(v)} ((BoolOrNone_tag(v) == BN_BOOL_TAG) || (BoolOrNone_tag(v) == BN_NONE_TAG))); -axiom [ax_l215c1]: (forall b: bool :: {BoolOrStrOrNone_mk_bool(b)} ((BoolOrStrOrNone_tag(BoolOrStrOrNone_mk_bool(b)) == BSN_BOOL_TAG) && (BoolOrStrOrNone_bool_val(BoolOrStrOrNone_mk_bool(b)) <==> b))); -axiom [ax_l218c1]: (forall s: string :: {BoolOrStrOrNone_mk_str(s)} ((BoolOrStrOrNone_tag(BoolOrStrOrNone_mk_str(s)) == BSN_STR_TAG) && (BoolOrStrOrNone_str_val(BoolOrStrOrNone_mk_str(s)) == s))); -axiom [ax_l221c1]: (forall n: None :: {BoolOrStrOrNone_mk_none(n)} ((BoolOrStrOrNone_tag(BoolOrStrOrNone_mk_none(n)) == BSN_NONE_TAG) && (BoolOrStrOrNone_none_val(BoolOrStrOrNone_mk_none(n)) == n))); -axiom [ax_l224c1]: (forall v: BoolOrStrOrNone :: {BoolOrStrOrNone_tag(v)} (((BoolOrStrOrNone_tag(v) == BSN_BOOL_TAG) || (BoolOrStrOrNone_tag(v) == BSN_STR_TAG)) || (BoolOrStrOrNone_tag(v) == BSN_NONE_TAG))); - -// Uninterpreted procedures -procedure importFrom(module : string, names : ListStr, level : int) returns () -; - -procedure import(names : ListStr) returns () -; - -procedure print(msg : string) returns () -; - -function str_len(s : string) : int; - procedure test_helper_procedure(req_name : string, opt_name : StrOrNone) returns (maybe_except: ExceptOrNone) spec { requires [req_name_is_foo]: req_name == "foo"; - requires [opt_name_none_or_bar]: (if (StrOrNone_tag(opt_name) != SN_NONE_TAG) then (StrOrNone_tag(opt_name) == SN_STR_TAG) else true); + requires [opt_name_none_or_str]: (if (StrOrNone_tag(opt_name) != SN_NONE_TAG) then (StrOrNone_tag(opt_name) == SN_STR_TAG) else true); requires [opt_name_none_or_bar]: (if (StrOrNone_tag(opt_name) == SN_STR_TAG) then (StrOrNone_str_val(opt_name) == "bar") else true); free ensures [ensures_maybe_except_none]: (ExceptOrNone_tag(maybe_except) == EN_NONE_TAG); } diff --git a/Strata/Languages/Python/PyFactory.lean b/Strata/Languages/Python/PyFactory.lean new file mode 100644 index 00000000..0d182188 --- /dev/null +++ b/Strata/Languages/Python/PyFactory.lean @@ -0,0 +1,102 @@ +/- + Copyright Strata Contributors + + SPDX-License-Identifier: Apache-2.0 OR MIT +-/ + +import Strata.Languages.Boogie.Verifier +import Strata.Languages.Python.Regex.ReToBoogie + +namespace Strata +namespace Python + +------------------------------------------------------------------------------- + +/- +Candidate translation pass for Python `re` code: + +## Python Code: + +``` +... +PATTERN = r"^[a-z0-9][a-z0-9.-]{1,3}[a-z0-9]$" +REGEX = re.compile(PATTERN) # default flags == 0 +... +if not re.match(REGEX, name) then # default flags == 0 + return False +... +``` + +## Corresponding Strata.Boogie: + +``` +procedure _main () { + +var PATTERN : string = "^[a-z0-9][a-z0-9.-]{1,3}[a-z0-9]$"; + +var REGEX : regex; +var $__REGEX : Except Error regex := PyReCompile(PATTERN, 0) + +if ExceptErrorRegex_isOK($__REGEX) then { + REGEX := ExceptErrorRegex_getOK($__REGEX); +} else if (Error_isUnimplemented(ExceptErrorRegex_getError($__REGEX)) then { + // Unsupported by Strata. + havoc REGEX; +} else { + // + // TODO: Implement a version of `assert` that takes an expression to be + // evaluated when the assertion fails. In this case, we'd display the + // (computed) error message in `ExceptErrorRegex_getError($__REGEX)`. + // + // E.g., `assert false (printOnFailure := ExceptErrorRegex_getError($__REGEX));` + // + assert false; +} +... + +if not PyReMatch(REGEX, name, 0) then + return false +} +``` + +-/ + +open Boogie +open Lambda LTy.Syntax LExpr.SyntaxMono + +def reCompileFunc : LFunc Boogie.BoogieLParams := + { name := "PyReCompile", + typeArgs := [], + inputs := [("string", mty[string]), + ("flags", mty[int])] + output := mty[ExceptErrorRegex], + concreteEval := some + (fun orig_e args => match args with + | [LExpr.strConst () s, LExpr.intConst () 0] => + -- This function has a concrete evaluation implementation only when + -- flags == 0. + -- (FIXME): We use `.match` mode below because we support only + -- `re.match` for now. However, `re.compile` isn't mode-specific in + -- general. + let (expr, maybe_err) := pythonRegexToBoogie s .match + match maybe_err with + | none => + -- Note: Do not use `eb` (in Boogie.Syntax) here (e.g., see below) + -- eb[(~ExceptErrorRegex_mkOK expr)] + -- that captures `expr` as an `.fvar`. + LExpr.mkApp () (.op () "ExceptErrorRegex_mkOK" none) [expr] + | some (ParseError.unimplemented msg _pattern _pos) => + LExpr.mkApp () (.op () "ExceptErrorRegex_mkErr" none) + [LExpr.mkApp () (.op () "Error_Unimplemented" none) [.strConst () (toString msg)]] + | some (ParseError.patternError msg _pattern _pos) => + LExpr.mkApp () (.op () "ExceptErrorRegex_mkErr" none) + [LExpr.mkApp () (.op () "Error_RePatternErr" none) [.strConst () (toString msg)]] + | _ => orig_e) + } + +def ReFactory : @Factory Boogie.BoogieLParams := + #[ + reCompileFunc + ] + +------------------------------------------------------------------------------- diff --git a/Strata/Languages/Python/PythonToBoogie.lean b/Strata/Languages/Python/PythonToBoogie.lean index 1aed3db7..40485fe7 100644 --- a/Strata/Languages/Python/PythonToBoogie.lean +++ b/Strata/Languages/Python/PythonToBoogie.lean @@ -13,6 +13,7 @@ import Strata.Languages.Boogie.Boogie import Strata.Languages.Python.PythonDialect import Strata.Languages.Python.FunctionSignatures import Strata.Languages.Python.Regex.ReToBoogie +import Strata.Languages.Python.PyFactory import StrataTest.Internal.InternalFunctionSignatures namespace Strata diff --git a/Strata/Languages/Python/Regex/ReParser.lean b/Strata/Languages/Python/Regex/ReParser.lean index 8d3a3a83..cc70bff2 100644 --- a/Strata/Languages/Python/Regex/ReParser.lean +++ b/Strata/Languages/Python/Regex/ReParser.lean @@ -83,7 +83,8 @@ inductive RegexAST where ------------------------------------------------------------------------------- -/-- Parse character class like [a-z], [0-9], etc. into union of ranges and chars. -/ +/-- Parse character class like [a-z], [0-9], etc. into union of ranges and + chars. Note that this parses `|` as a character. -/ def parseCharClass (s : String) (pos : String.Pos) : Except ParseError (RegexAST × String.Pos) := do if s.get? pos != some '[' then throw (.patternError "Expected '[' at start of character class" s pos) let mut i := s.next pos @@ -117,13 +118,13 @@ def parseCharClass (s : String) (pos : String.Pos) : Except ParseError (RegexAST result := some (match result with | none => r | some prev => RegexAST.union prev r) i := s.next i - let some ast := result | throw (.patternError "Empty character class" s pos) + let some ast := result | throw (.patternError "Unterminated character set" s pos) let finalAst := if isComplement then RegexAST.complement ast else ast pure (finalAst, s.next i) ------------------------------------------------------------------------------- -/-- Parse numeric repeats like `{10}` or `{1,10}` into min and max bounds -/ +/-- Parse numeric repeats like `{10}` or `{1,10}` into min and max bounds. -/ def parseBounds (s : String) (pos : String.Pos) : Except ParseError (Nat × Nat × String.Pos) := do if s.get? pos != some '{' then throw (.patternError "Expected '{' at start of bounds" s pos) let mut i := s.next pos @@ -158,56 +159,11 @@ def parseBounds (s : String) (pos : String.Pos) : Except ParseError (Nat × Nat ------------------------------------------------------------------------------- mutual -/-- Parse group (content between parentheses) with alternation (`|`) support. -/ -partial def parseGroup (s : String) (pos : String.Pos) : Except ParseError (RegexAST × String.Pos) := do - if s.get? pos != some '(' then throw (.patternError "Expected '(' at start of group" s pos) - let mut i := s.next pos - - -- Check for extension notation (?... - if !s.atEnd i && s.get? i == some '?' then - let i1 := s.next i - if !s.atEnd i1 then - match s.get? i1 with - | some '=' => throw (.unimplemented "Positive lookahead (?=...) is not supported" s pos) - | some '!' => throw (.unimplemented "Negative lookahead (?!...) is not supported" s pos) - | _ => throw (.unimplemented "Extension notation (?...) is not supported" s pos) - - let mut alternatives : List (List RegexAST) := [[]] - - -- Parse elements until we hit ')'. - while !s.atEnd i && s.get? i != some ')' do - if s.get? i == some '|' then - -- Start new alternative. - alternatives := [] :: alternatives - i := s.next i - else - let (ast, nextPos) ← parseRegex s i - -- Add to current alternative. - alternatives := match alternatives with - | [] => [[ast]] - | head :: tail => (ast :: head) :: tail - i := nextPos - - if s.get? i != some ')' then throw (.patternError "Unclosed group: missing ')'" s i) - - -- Build result: concatenate each alternative, then union them. - let concatAlternatives := alternatives.reverse.filterMap fun alt => - match alt.reverse with - | [] => none - | [single] => some single - | head :: tail => some (tail.foldl RegexAST.concat head) - - match concatAlternatives with - | [] => - -- Empty group matches empty string. - pure (.group .empty, s.next i) - | [single] => pure (RegexAST.group single, s.next i) - | head :: tail => - let grouped := tail.foldl RegexAST.union head - pure (.group grouped, s.next i) - -/-- Parse single regex element with optional numeric repeat bounds. -/ -partial def parseRegex (s : String) (pos : String.Pos) : Except ParseError (RegexAST × String.Pos) := do +/-- +Parse atom: single element (char, class, anchor, group) with optional +quantifier. Stops at the first `|`. +-/ +partial def parseAtom (s : String) (pos : String.Pos) : Except ParseError (RegexAST × String.Pos) := do if s.atEnd pos then throw (.patternError "Unexpected end of regex" s pos) let some c := s.get? pos | throw (.patternError "Invalid position" s pos) @@ -216,12 +172,16 @@ partial def parseRegex (s : String) (pos : String.Pos) : Except ParseError (Rege if c == '*' || c == '+' || c == '{' || c == '?' then throw (.patternError s!"Quantifier '{c}' at position {pos} has nothing to quantify" s pos) + -- Detect unbalanced closing parenthesis + if c == ')' then + throw (.patternError "Unbalanced parenthesis" s pos) + -- Parse base element (anchor, char class, group, anychar, escape, or single char). let (base, nextPos) ← match c with | '^' => pure (RegexAST.anchor_start, s.next pos) | '$' => pure (RegexAST.anchor_end, s.next pos) | '[' => parseCharClass s pos - | '(' => parseGroup s pos + | '(' => parseExplicitGroup s pos | '.' => pure (RegexAST.anychar, s.next pos) | '\\' => -- Handle escape sequence. @@ -280,27 +240,66 @@ partial def parseRegex (s : String) (pos : String.Pos) : Except ParseError (Rege | _ => pure (base, nextPos) else pure (base, nextPos) -end -/-- -Parse entire regex string into list of AST nodes. --/ -partial def parseAll (s : String) (pos : String.Pos) (acc : List RegexAST) : - Except ParseError (List RegexAST) := - if s.atEnd pos then pure acc.reverse - else do - let (ast, nextPos) ← parseRegex s pos - parseAll s nextPos (ast :: acc) +/-- Parse explicit group with parentheses. -/ +partial def parseExplicitGroup (s : String) (pos : String.Pos) : Except ParseError (RegexAST × String.Pos) := do + if s.get? pos != some '(' then throw (.patternError "Expected '(' at start of group" s pos) + let mut i := s.next pos -/-- -Parse entire regex string into a single concatenated RegexAST node --/ -def parseTop (s : String) : Except ParseError RegexAST := do - let asts ← parseAll s 0 [] - match asts with - | [] => pure (.group .empty) - | [single] => pure single - | head :: tail => pure (tail.foldl RegexAST.concat head) + -- Check for extension notation (?... + if !s.atEnd i && s.get? i == some '?' then + let i1 := s.next i + if !s.atEnd i1 then + match s.get? i1 with + | some '=' => throw (.unimplemented "Positive lookahead (?=...) is not supported" s pos) + | some '!' => throw (.unimplemented "Negative lookahead (?!...) is not supported" s pos) + | _ => throw (.unimplemented "Extension notation (?...) is not supported" s pos) + + let (inner, finalPos) ← parseGroup s i (some ')') + pure (.group inner, finalPos) + +/-- Parse group: handles alternation and concatenation at current scope. -/ +partial def parseGroup (s : String) (pos : String.Pos) (endChar : Option Char) : + Except ParseError (RegexAST × String.Pos) := do + let mut alternatives : List (List RegexAST) := [[]] + let mut i := pos + + -- Parse until end of string or `endChar`. + while !s.atEnd i && (endChar.isNone || s.get? i != endChar) do + if s.get? i == some '|' then + -- Push a new scope to `alternatives`. + alternatives := [] :: alternatives + i := s.next i + else + let (ast, nextPos) ← parseAtom s i + alternatives := match alternatives with + | [] => [[ast]] + | head :: tail => (ast :: head) :: tail + i := nextPos + + -- Check for expected end character. + if let some ec := endChar then + if s.get? i != some ec then + throw (.patternError s!"Expected '{ec}'" s i) + i := s.next i + + -- Build result: concatenate each alternative, then union them. + let concatAlts := alternatives.reverse.filterMap fun alt => + match alt.reverse with + | [] => -- Empty regex. + some (.empty) + | [single] => some single + | head :: tail => some (tail.foldl RegexAST.concat head) + + match concatAlts with + | [] => pure (.empty, i) + | [single] => pure (single, i) + | head :: tail => pure (tail.foldl RegexAST.union head, i) +end + +/-- Parse entire regex string (implicit top-level group). -/ +def parseTop (s : String) : Except ParseError RegexAST := + parseGroup s 0 none |>.map (fun (r, _) => r) ------------------------------------------------------------------------------- @@ -379,9 +378,9 @@ end Test.parseBounds section Test.parseTop /-- -info: Except.ok [Strata.Python.RegexAST.union - (Strata.Python.RegexAST.union (Strata.Python.RegexAST.char '1') (Strata.Python.RegexAST.range '0' '1')) - (Strata.Python.RegexAST.char '5')] +info: Except.ok (Strata.Python.RegexAST.union + (Strata.Python.RegexAST.union (Strata.Python.RegexAST.char '1') (Strata.Python.RegexAST.range '0' '1')) + (Strata.Python.RegexAST.char '5')) -/ #guard_msgs in /- @@ -389,7 +388,7 @@ Cross-checked with: >>> re._parser.parse('[10-15]') [(IN, [(LITERAL, 49), (RANGE, (48, 49)), (LITERAL, 53)])] -/ -#eval parseAll "[10-15]" 0 [] +#eval parseTop "[10-15]" /-- info: Except.ok (Strata.Python.RegexAST.concat @@ -426,11 +425,11 @@ info: Except.error (Strata.Python.ParseError.patternError { byteIdx := 2 }) -/ #guard_msgs in -#eval parseAll ".*{1,10}" 0 [] +#eval parseTop ".*{1,10}" -/-- info: Except.ok [Strata.Python.RegexAST.star (Strata.Python.RegexAST.anychar)] -/ +/-- info: Except.ok (Strata.Python.RegexAST.star (Strata.Python.RegexAST.anychar)) -/ #guard_msgs in -#eval parseAll ".*" 0 [] +#eval parseTop ".*" /-- info: Except.error (Strata.Python.ParseError.patternError @@ -439,7 +438,7 @@ info: Except.error (Strata.Python.ParseError.patternError { byteIdx := 0 }) -/ #guard_msgs in -#eval parseAll "*abc" 0 [] +#eval parseTop "*abc" /-- info: Except.error (Strata.Python.ParseError.patternError @@ -448,55 +447,63 @@ info: Except.error (Strata.Python.ParseError.patternError { byteIdx := 0 }) -/ #guard_msgs in -#eval parseAll "+abc" 0 [] +#eval parseTop "+abc" -/-- info: Except.ok [Strata.Python.RegexAST.loop (Strata.Python.RegexAST.range 'a' 'z') 1 10] -/ +/-- info: Except.ok (Strata.Python.RegexAST.loop (Strata.Python.RegexAST.range 'a' 'z') 1 10) -/ #guard_msgs in -#eval parseAll "[a-z]{1,10}" 0 [] +#eval parseTop "[a-z]{1,10}" -/-- -info: Except.ok [Strata.Python.RegexAST.loop (Strata.Python.RegexAST.range 'a' 'z') 10 10] --/ +/-- info: Except.ok (Strata.Python.RegexAST.loop (Strata.Python.RegexAST.range 'a' 'z') 10 10) -/ #guard_msgs in -#eval parseAll "[a-z]{10}" 0 [] +#eval parseTop "[a-z]{10}" /-- -info: Except.ok [Strata.Python.RegexAST.anchor_start, - Strata.Python.RegexAST.union (Strata.Python.RegexAST.range 'a' 'z') (Strata.Python.RegexAST.range '0' '9'), - Strata.Python.RegexAST.loop - (Strata.Python.RegexAST.union - (Strata.Python.RegexAST.union - (Strata.Python.RegexAST.union (Strata.Python.RegexAST.range 'a' 'z') (Strata.Python.RegexAST.range '0' '9')) - (Strata.Python.RegexAST.char '.')) - (Strata.Python.RegexAST.char '-')) - 1 - 10, - Strata.Python.RegexAST.anchor_end] +info: Except.ok (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.anchor_start) + (Strata.Python.RegexAST.union (Strata.Python.RegexAST.range 'a' 'z') (Strata.Python.RegexAST.range '0' '9'))) + (Strata.Python.RegexAST.loop + (Strata.Python.RegexAST.union + (Strata.Python.RegexAST.union + (Strata.Python.RegexAST.union (Strata.Python.RegexAST.range 'a' 'z') (Strata.Python.RegexAST.range '0' '9')) + (Strata.Python.RegexAST.char '.')) + (Strata.Python.RegexAST.char '-')) + 1 + 10)) + (Strata.Python.RegexAST.anchor_end)) -/ #guard_msgs in -#eval parseAll "^[a-z0-9][a-z0-9.-]{1,10}$" 0 [] +#eval parseTop "^[a-z0-9][a-z0-9.-]{1,10}$" -- Test escape sequences (need \\ in Lean strings to get single \) /-- -info: Except.ok [Strata.Python.RegexAST.star (Strata.Python.RegexAST.anychar), - Strata.Python.RegexAST.char '.', - Strata.Python.RegexAST.char '.', - Strata.Python.RegexAST.anychar, - Strata.Python.RegexAST.star (Strata.Python.RegexAST.anychar)] +info: Except.ok (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.star (Strata.Python.RegexAST.anychar)) + (Strata.Python.RegexAST.char '.')) + (Strata.Python.RegexAST.char '.')) + (Strata.Python.RegexAST.anychar)) + (Strata.Python.RegexAST.star (Strata.Python.RegexAST.anychar))) -/ #guard_msgs in -#eval parseAll ".*\\.\\...*" 0 [] +#eval parseTop ".*\\.\\...*" /-- -info: Except.ok [Strata.Python.RegexAST.anchor_start, - Strata.Python.RegexAST.char 'x', - Strata.Python.RegexAST.char 'n', - Strata.Python.RegexAST.char '-', - Strata.Python.RegexAST.char '-', - Strata.Python.RegexAST.star (Strata.Python.RegexAST.anychar)] +info: Except.ok (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.anchor_start) (Strata.Python.RegexAST.char 'x')) + (Strata.Python.RegexAST.char 'n')) + (Strata.Python.RegexAST.char '-')) + (Strata.Python.RegexAST.char '-')) + (Strata.Python.RegexAST.star (Strata.Python.RegexAST.anychar))) -/ #guard_msgs in -#eval parseAll "^xn--.*" 0 [] +#eval parseTop "^xn--.*" /-- info: Except.error (Strata.Python.ParseError.patternError @@ -505,7 +512,7 @@ info: Except.error (Strata.Python.ParseError.patternError { byteIdx := 1 }) -/ #guard_msgs in -#eval parseAll "[x-c]" 0 [] +#eval parseTop "[x-c]" /-- info: Except.error (Strata.Python.ParseError.patternError @@ -514,45 +521,71 @@ info: Except.error (Strata.Python.ParseError.patternError { byteIdx := 2 }) -/ #guard_msgs in -#eval parseAll "[51-08]" 0 [] +#eval parseTop "[51-08]" /-- -info: Except.ok [Strata.Python.RegexAST.group - (Strata.Python.RegexAST.concat - (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.char 'a') (Strata.Python.RegexAST.char 'b')) - (Strata.Python.RegexAST.char 'c'))] +info: Except.ok (Strata.Python.RegexAST.group + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.char 'a') (Strata.Python.RegexAST.char 'b')) + (Strata.Python.RegexAST.char 'c'))) -/ #guard_msgs in -#eval parseAll "(abc)" 0 [] +#eval parseTop "(abc)" /-- -info: Except.ok [Strata.Python.RegexAST.group - (Strata.Python.RegexAST.union (Strata.Python.RegexAST.char 'a') (Strata.Python.RegexAST.char 'b'))] +info: Except.ok (Strata.Python.RegexAST.group + (Strata.Python.RegexAST.union (Strata.Python.RegexAST.char 'a') (Strata.Python.RegexAST.char 'b'))) -/ #guard_msgs in -#eval parseAll "(a|b)" 0 [] +#eval parseTop "(a|b)" /-- -info: Except.ok [Strata.Python.RegexAST.star - (Strata.Python.RegexAST.group - (Strata.Python.RegexAST.union - (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.char 'a') (Strata.Python.RegexAST.char 'b')) - (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.char 'c') (Strata.Python.RegexAST.char 'd'))))] +info: Except.ok (Strata.Python.RegexAST.union + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.anchor_start) (Strata.Python.RegexAST.char 'a')) + (Strata.Python.RegexAST.anchor_end)) + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.anchor_start) (Strata.Python.RegexAST.char 'b')) + (Strata.Python.RegexAST.anchor_end))) -/ #guard_msgs in -#eval parseAll "(ab|cd)*" 0 [] +#eval parseTop "^a$|^b$" /-- -info: Except.ok [Strata.Python.RegexAST.char 'a', Strata.Python.RegexAST.optional (Strata.Python.RegexAST.char 'b')] +info: Except.ok (Strata.Python.RegexAST.union + (Strata.Python.RegexAST.group + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.anchor_start) (Strata.Python.RegexAST.char 'a')) + (Strata.Python.RegexAST.anchor_end))) + (Strata.Python.RegexAST.group + (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.anchor_start) (Strata.Python.RegexAST.char 'b')) + (Strata.Python.RegexAST.anchor_end)))) -/ #guard_msgs in -#eval parseAll "ab?" 0 [] +#eval parseTop "(^a$)|(^b$)" /-- -info: Except.ok [Strata.Python.RegexAST.optional (Strata.Python.RegexAST.range 'a' 'z')] +info: Except.ok (Strata.Python.RegexAST.star + (Strata.Python.RegexAST.group + (Strata.Python.RegexAST.union + (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.char 'a') (Strata.Python.RegexAST.char 'b')) + (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.char 'c') (Strata.Python.RegexAST.char 'd'))))) -/ #guard_msgs in -#eval parseAll "[a-z]?" 0 [] +#eval parseTop "(ab|cd)*" + +/-- +info: Except.ok (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.char 'a') + (Strata.Python.RegexAST.optional (Strata.Python.RegexAST.char 'b'))) +-/ +#guard_msgs in +#eval parseTop "ab?" + +/-- info: Except.ok (Strata.Python.RegexAST.optional (Strata.Python.RegexAST.range 'a' 'z')) -/ +#guard_msgs in +#eval parseTop "[a-z]?" /-- info: Except.error (Strata.Python.ParseError.unimplemented @@ -561,7 +594,7 @@ info: Except.error (Strata.Python.ParseError.unimplemented { byteIdx := 0 }) -/ #guard_msgs in -#eval parseAll "(?=test)" 0 [] +#eval parseTop "(?=test)" /-- info: Except.error (Strata.Python.ParseError.unimplemented @@ -570,7 +603,7 @@ info: Except.error (Strata.Python.ParseError.unimplemented { byteIdx := 0 }) -/ #guard_msgs in -#eval parseAll "(?!silly-)" 0 [] +#eval parseTop "(?!silly-)" /-- info: Except.error (Strata.Python.ParseError.unimplemented @@ -579,7 +612,7 @@ info: Except.error (Strata.Python.ParseError.unimplemented { byteIdx := 0 }) -/ #guard_msgs in -#eval parseAll "(?:abc)" 0 [] +#eval parseTop "(?:abc)" /-- info: Except.error (Strata.Python.ParseError.unimplemented @@ -588,73 +621,102 @@ info: Except.error (Strata.Python.ParseError.unimplemented { byteIdx := 0 }) -/ #guard_msgs in -#eval parseAll "(?Ptest)" 0 [] +#eval parseTop "(?Ptest)" /-- info: Except.error (Strata.Python.ParseError.unimplemented "Special sequence \\d is not supported" "\\d+" { byteIdx := 0 }) -/ #guard_msgs in -#eval parseAll "\\d+" 0 [] +#eval parseTop "\\d+" /-- info: Except.error (Strata.Python.ParseError.unimplemented "Special sequence \\w is not supported" "\\w*" { byteIdx := 0 }) -/ #guard_msgs in -#eval parseAll "\\w*" 0 [] +#eval parseTop "\\w*" /-- info: Except.error (Strata.Python.ParseError.unimplemented "Special sequence \\s is not supported" "\\s+" { byteIdx := 0 }) -/ #guard_msgs in -#eval parseAll "\\s+" 0 [] +#eval parseTop "\\s+" /-- info: Except.error (Strata.Python.ParseError.unimplemented "Escape sequence \\n is not supported" "test\\n" { byteIdx := 4 }) -/ #guard_msgs in -#eval parseAll "test\\n" 0 [] +#eval parseTop "test\\n" /-- info: Except.error (Strata.Python.ParseError.unimplemented "Backreference \\1 is not supported" "(a)\\1" { byteIdx := 3 }) -/ #guard_msgs in -#eval parseAll "(a)\\1" 0 [] +#eval parseTop "(a)\\1" /-- info: Except.error (Strata.Python.ParseError.unimplemented "Non-greedy quantifier *? is not supported" "a*?" { byteIdx := 1 }) -/ #guard_msgs in -#eval parseAll "a*?" 0 [] +#eval parseTop "a*?" /-- info: Except.error (Strata.Python.ParseError.unimplemented "Non-greedy quantifier +? is not supported" "a+?" { byteIdx := 1 }) -/ #guard_msgs in -#eval parseAll "a+?" 0 [] +#eval parseTop "a+?" /-- info: Except.error (Strata.Python.ParseError.unimplemented "Non-greedy quantifier ?? is not supported" "a??" { byteIdx := 1 }) -/ #guard_msgs in -#eval parseAll "a??" 0 [] +#eval parseTop "a??" /-- info: Except.error (Strata.Python.ParseError.unimplemented "Possessive quantifier *+ is not supported" "a*+" { byteIdx := 1 }) -/ #guard_msgs in -#eval parseAll "a*+" 0 [] +#eval parseTop "a*+" /-- info: Except.error (Strata.Python.ParseError.unimplemented "Possessive quantifier ++ is not supported" "a++" { byteIdx := 1 }) -/ #guard_msgs in -#eval parseAll "a++" 0 [] +#eval parseTop "a++" /-- info: Except.error (Strata.Python.ParseError.unimplemented "Possessive quantifier ?+ is not supported" "a?+" { byteIdx := 1 }) -/ #guard_msgs in -#eval parseAll "a?+" 0 [] +#eval parseTop "a?+" + +/-- +info: Except.ok (Strata.Python.RegexAST.union + (Strata.Python.RegexAST.empty) + (Strata.Python.RegexAST.concat (Strata.Python.RegexAST.char 'x') (Strata.Python.RegexAST.char 'y'))) +-/ +#guard_msgs in +#eval parseTop "|xy" + +/-- +info: Except.ok (Strata.Python.RegexAST.concat + (Strata.Python.RegexAST.char 'a') + (Strata.Python.RegexAST.group + (Strata.Python.RegexAST.union (Strata.Python.RegexAST.empty) (Strata.Python.RegexAST.char 'b')))) +-/ +#guard_msgs in +#eval parseTop "a(|b)" + +/-- +info: Except.error (Strata.Python.ParseError.patternError "Unbalanced parenthesis" "x)" { byteIdx := 1 }) +-/ +#guard_msgs in +#eval parseTop "x)" + +/-- +info: Except.error (Strata.Python.ParseError.patternError "Unbalanced parenthesis" "())" { byteIdx := 2 }) +-/ +#guard_msgs in +#eval parseTop "())" end Test.parseTop diff --git a/Strata/Languages/Python/Regex/ReToBoogie.lean b/Strata/Languages/Python/Regex/ReToBoogie.lean index e36c90e1..4ea29793 100644 --- a/Strata/Languages/Python/Regex/ReToBoogie.lean +++ b/Strata/Languages/Python/Regex/ReToBoogie.lean @@ -15,52 +15,26 @@ namespace Python open Lambda.LExpr open Boogie -/-- -Map `RegexAST` nodes to Boogie expressions. Note that anchor nodes are not -handled here. See `pythonRegexToBoogie` for a preprocessing pass. --/ -def RegexAST.toBoogie (ast : RegexAST) : Except ParseError Boogie.Expression.Expr := do - match ast with - | .char c => - return (mkApp () (.op () strToRegexFunc.name none) [strConst () (toString c)]) - | .range c1 c2 => - return mkApp () (.op () reRangeFunc.name none) [strConst () (toString c1), strConst () (toString c2)] - | .union r1 r2 => - let r1b ← toBoogie r1 - let r2b ← toBoogie r2 - return mkApp () (.op () reUnionFunc.name none) [r1b, r2b] - | .concat r1 r2 => - let r1b ← toBoogie r1 - let r2b ← toBoogie r2 - return mkApp () (.op () reConcatFunc.name none) [r1b, r2b] - | .star r => - let rb ← toBoogie r - return mkApp () (.op () reStarFunc.name none) [rb] - | .plus r => - let rb ← toBoogie r - return mkApp () (.op () rePlusFunc.name none) [rb] - | .optional r => - let rb ← toBoogie r - return mkApp () (.op () reLoopFunc.name none) [rb, intConst () 0, intConst () 1] - | .loop r min max => - let rb ← toBoogie r - return mkApp () (.op () reLoopFunc.name none) [rb, intConst () min, intConst () max] - | .anychar => - return mkApp () (.op () reAllCharFunc.name none) [] - | .group r => toBoogie r - | .empty => return mkApp () (.op () strToRegexFunc.name none) [strConst () ""] - | .complement r => - let rb ← toBoogie r - return mkApp () (.op () reCompFunc.name none) [rb] - | .anchor_start => throw (.patternError "Anchor should not appear in AST conversion" "" 0) - | .anchor_end => throw (.patternError "Anchor should not appear in AST conversion" "" 0) - /-- Python regexes can be interpreted differently based on the matching mode. -Consider the regex pattern `x`. + +Consider the regex pattern that does not contain any anchors: `x`. For search, this is equivalent to `.*x.*`. For match, this is equivalent to `x.*`. -For full match, this is exactly `x`. +For fullmatch, this is exactly `x`. + +Consider the regex pattern: `^x`. +For search, this is equivalent to `x.*`. +For match, this is equivalent to `x.*`. +Again for fullmatch, this is exactly `x`. + +Consider the regex pattern: `x$`. +For search, this is equivalent to `.*x`. +For match, this is equivalent to `x`. +Again for fullmatch, this is exactly `x`. + +Consider the regex pattern: `^x$`. +For search, match, and fullmatch, this is equivalent to `x`. -/ inductive MatchMode where | search -- `re.search()` - match anywhere in string @@ -68,81 +42,295 @@ inductive MatchMode where | fullmatch -- `re.fullmatch()` - match entire string deriving Repr, BEq +/-- +When `r` is definitely consuming, this function returns `true`. +Returns `false` otherwise (i.e., when it _may_ not be consuming). +-/ +def RegexAST.alwaysConsume (r : RegexAST) : Bool := + match r with + | .char _ => true + | .range _ _ => true + | .union r1 r2 => alwaysConsume r1 && alwaysConsume r2 + | .concat r1 r2 => alwaysConsume r1 || alwaysConsume r2 + | .anychar => true + | .star _ => false + | .plus r1 => alwaysConsume r1 + | .optional _ => false + | .loop r1 n _ => alwaysConsume r1 && n ≠ 0 + | .anchor_start => false + | .anchor_end => false + | .group r1 => alwaysConsume r1 + | .empty => false + | .complement _ => true /-- -Map `pyRegex` -- a string indicating a regular expression pattern -- to a -corresponding Boogie expression, taking match mode semantics into account. -Returns a pair of (result, optional error). On error, returns `re.all` as -fallback. +Empty regex pattern; matches an empty string. -/ +def Boogie.emptyRegex : Boogie.Expression.Expr := + mkApp () (.op () strToRegexFunc.name none) [strConst () ""] + +/-- +Unmatchable regex pattern. +-/ +def Boogie.unmatchableRegex : Boogie.Expression.Expr := + mkApp () (.op () reNoneFunc.name none) [] + +partial def RegexAST.toBoogie (r : RegexAST) (atStart atEnd : Bool) : + Boogie.Expression.Expr := + match r with + | .char c => + (mkApp () (.op () strToRegexFunc.name none) [strConst () (toString c)]) + | .range c1 c2 => + mkApp () (.op () reRangeFunc.name none) [strConst () (toString c1), strConst () (toString c2)] + | .anychar => + mkApp () (.op () reAllCharFunc.name none) [] + | .empty => Boogie.emptyRegex + | .complement r => + let rb := toBoogie r atStart atEnd + mkApp () (.op () reCompFunc.name none) [rb] + | .anchor_start => + if atStart then Boogie.emptyRegex else Boogie.unmatchableRegex + | .anchor_end => + if atEnd then Boogie.emptyRegex else Boogie.unmatchableRegex + | .plus r1 => + toBoogie (.concat r1 (.star r1)) atStart atEnd + | .star r1 => + let r1b := toBoogie r1 atStart atEnd + let r2b := + match (alwaysConsume r1) with + | true => + let r1b := toBoogie r1 atStart false -- r1 at the beginning + let r2b := toBoogie r1 false false -- r1s in the middle + let r3b := toBoogie r1 false atEnd -- r1 at the end + let r2b := mkApp () (.op () reStarFunc.name none) [r2b] + mkApp () (.op () reConcatFunc.name none) + [mkApp () (.op () reConcatFunc.name none) [r1b, r2b], r3b] + | false => + mkApp () (.op () reStarFunc.name none) [r1b] + mkApp () (.op () reUnionFunc.name none) + [mkApp () (.op () reUnionFunc.name none) [Boogie.emptyRegex, r1b], r2b] + | .optional r1 => + toBoogie (.union .empty r1) atStart atEnd + | .loop r1 n m => + match n, m with + | 0, 0 => Boogie.emptyRegex + | 0, 1 => toBoogie (.union .empty r1) atStart atEnd + | 0, m => -- Note: m >= 2 + let r1b := toBoogie r1 atStart atEnd + let r2b := match (alwaysConsume r1) with + | true => + let r1b := toBoogie r1 atStart false -- r1 at the beginning + let r2b := toBoogie r1 false false -- r1s in the middle + let r3b := toBoogie r1 false atEnd -- r1 at the end + let r2b := mkApp () (.op () reLoopFunc.name none) [r2b, intConst () 0, intConst () (m-2)] + mkApp () (.op () reConcatFunc.name none) [mkApp () (.op () reConcatFunc.name none) [r1b, r2b], r3b] + | false => + mkApp () (.op () reLoopFunc.name none) [r1b, intConst () 0, intConst () m] + mkApp () (.op () reUnionFunc.name none) + [mkApp () (.op () reUnionFunc.name none) [Boogie.emptyRegex, r1b], + r2b] + | _, _ => + toBoogie (.concat r1 (.loop r1 (n - 1) (m - 1))) atStart atEnd + | .group r1 => toBoogie r1 atStart atEnd + | .concat r1 r2 => + match (alwaysConsume r1), (alwaysConsume r2) with + | true, true => + let r1b := toBoogie r1 atStart false + let r2b := toBoogie r2 false atEnd + mkApp () (.op () reConcatFunc.name none) [r1b, r2b] + | true, false => + let r1b := toBoogie r1 atStart atEnd + let r2b := toBoogie r2 false atEnd + mkApp () (.op () reConcatFunc.name none) [r1b, r2b] + | false, true => + let r1b := toBoogie r1 atStart false + let r2b := toBoogie r2 true atEnd + mkApp () (.op () reConcatFunc.name none) [r1b, r2b] + | false, false => + let r1b := toBoogie r1 atStart atEnd + let r2b := toBoogie r2 atStart atEnd + mkApp () (.op () reConcatFunc.name none) [r1b, r2b] + | .union r1 r2 => + let r1b := toBoogie r1 atStart atEnd + let r2b := toBoogie r2 atStart atEnd + mkApp () (.op () reUnionFunc.name none) [r1b, r2b] + def pythonRegexToBoogie (pyRegex : String) (mode : MatchMode := .fullmatch) : Boogie.Expression.Expr × Option ParseError := - let reAll := mkApp () (.op () reAllFunc.name none) [] - match parseAll pyRegex 0 [] with - | .error err => (reAll, some err) - | .ok asts => - - -- Detect start and end anchors, if any. - let hasStartAnchor := match asts.head? with | some .anchor_start => true | _ => false - let hasEndAnchor := match asts.getLast? with | some .anchor_end => true | _ => false - - -- Check for anchors in middle positions. - let middle := if hasStartAnchor then asts.tail else asts - let middle := if hasEndAnchor && !middle.isEmpty then middle.dropLast else middle - let hasMiddleAnchor := middle.any (fun ast => match ast with | .anchor_start | .anchor_end => true | _ => false) - - -- If anchors in middle, return `re.none` (unmatchable pattern). - -- NOTE: this is a heavy-ish semantic transform. - if hasMiddleAnchor then - let reNone := mkApp () (.op () reNoneFunc.name none) [] - (reNone, none) - else - - -- `filtered` does not have any anchors. - let filtered := middle - - -- Handle empty pattern. - if filtered.isEmpty then - (mkApp () (.op () strToRegexFunc.name none) [strConst () ""], none) - else - -- Concatenate filtered ASTs. - let core := match filtered with - | [single] => single - | head :: tail => tail.foldl RegexAST.concat head - | [] => unreachable! - - -- Convert core pattern. - match RegexAST.toBoogie core with - | .error err => (reAll, some err) - | .ok coreExpr => - -- Wrap with `Re.All` based on mode and anchors - let result := match mode, hasStartAnchor, hasEndAnchor with - -- Explicit anchors always override match mode. - | _, true, true => - -- ^pattern$ - exact match. - coreExpr - | _, true, false => - -- ^pattern - starts with. - mkApp () (.op () reConcatFunc.name none) [coreExpr, reAll] - | _, false, true => - -- pattern$ - ends with. - mkApp () (.op () reConcatFunc.name none) [reAll, coreExpr] - -- No anchors - apply match mode. - | .fullmatch, false, false => - -- exact match - coreExpr - | .match, false, false => - -- match at start - mkApp () (.op () reConcatFunc.name none) [coreExpr, reAll] - | .search, false, false => - -- match anywhere - mkApp () (.op () reConcatFunc.name none) [reAll, mkApp () (.op () reConcatFunc.name none) [coreExpr, reAll]] - (result, none) + match parseTop pyRegex with + | .error err => (mkApp () (.op () reAllFunc.name none) [], some err) + | .ok ast => + let dotStar := (RegexAST.star (.anychar)) + -- Wrap with `.*` based on mode. + let ast := match mode with + | .fullmatch => ast + | .match => .concat ast dotStar + | .search => .concat dotStar (.concat ast dotStar) + let result := RegexAST.toBoogie ast true true + (result, none) -------------------------------------------------------------------------------- +/-- +info: (((~Re.Concat ((~Re.Concat (~Str.ToRegEx #a)) (~Str.ToRegEx #b))) ((~Re.Union ((~Re.Union (~Str.ToRegEx #)) ~Re.AllChar)) ((~Re.Concat ((~Re.Concat ~Re.AllChar) (~Re.Star ~Re.AllChar))) ~Re.AllChar))), + none) +-/ +#guard_msgs in +#eval Std.format$ pythonRegexToBoogie "ab.*" -- Encoded as `ab(|.|..*.)` -section Test.pythonRegexToBoogie +/-- +info: (((~Re.Concat ((~Re.Concat (~Str.ToRegEx #a)) (~Str.ToRegEx #b))) ((~Re.Union ((~Re.Union (~Str.ToRegEx #)) ((~Re.Concat (~Str.ToRegEx #c)) (~Str.ToRegEx #)))) ((~Re.Concat ((~Re.Concat ((~Re.Concat (~Str.ToRegEx #c)) ~Re.None)) (~Re.Star ((~Re.Concat (~Str.ToRegEx #c)) ~Re.None)))) ((~Re.Concat (~Str.ToRegEx #c)) (~Str.ToRegEx #))))), + none) +-/ +#guard_msgs in +#eval Std.format$ pythonRegexToBoogie "ab(c$)*" + +/-- +info: (((~Re.Concat ((~Re.Concat (~Str.ToRegEx #a)) (~Str.ToRegEx #b))) ((~Re.Union ((~Re.Union (~Str.ToRegEx #)) ((~Re.Concat ((~Re.Concat ~Re.None) (~Str.ToRegEx #c))) (~Str.ToRegEx #)))) ((~Re.Concat ((~Re.Concat ((~Re.Concat ((~Re.Concat ~Re.None) (~Str.ToRegEx #c))) ~Re.None)) (~Re.Star ((~Re.Concat ((~Re.Concat ~Re.None) (~Str.ToRegEx #c))) ~Re.None)))) ((~Re.Concat ((~Re.Concat ~Re.None) (~Str.ToRegEx #c))) (~Str.ToRegEx #))))), + none) +-/ +#guard_msgs in +#eval Std.format$ pythonRegexToBoogie "ab(^c$)*" + +/-- info: (((~Re.Concat (~Str.ToRegEx #a)) (~Str.ToRegEx #b)), none) -/ +#guard_msgs in +#eval Std.format$ pythonRegexToBoogie "ab" + +/-- info: (((~Re.Union (~Str.ToRegEx #a)) (~Str.ToRegEx #b)), none) -/ +#guard_msgs in +#eval Std.format$ pythonRegexToBoogie "a|b" + +/-- +info: (((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #a))) (~Str.ToRegEx #b)), none) +-/ +#guard_msgs in +#eval Std.format$ pythonRegexToBoogie "^ab" + +/-- +info: (((~Re.Concat ((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #a))) (~Str.ToRegEx #b))) (~Str.ToRegEx #)), + none) +-/ +#guard_msgs in +#eval Std.format$ pythonRegexToBoogie "^ab$" + +/-- info: (((~Re.Concat ((~Re.Concat (~Str.ToRegEx #a)) ~Re.None)) (~Str.ToRegEx #b)), none) -/ +#guard_msgs in +#eval Std.format$ pythonRegexToBoogie "(a$)b" + +/-- +info: (((~Re.Concat ((~Re.Concat ((~Re.Concat ((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #))) (~Str.ToRegEx #))) (~Str.ToRegEx #a))) (~Str.ToRegEx #))) (~Str.ToRegEx #)), + none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "^^^a$$" + +/-- +info: (((~Re.Concat (~Str.ToRegEx #)) ((~Re.Concat ((~Re.Concat ((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #))) (~Str.ToRegEx #a))) (~Str.ToRegEx #))) (~Str.ToRegEx #))), + none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "^(^^a$$)" +/-- +info: (((~Re.Union ((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #a))) (~Str.ToRegEx #))) ((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #b))) (~Str.ToRegEx #))), + none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "(^a$)|(^b$)" + +/-- +info: (((~Re.Concat (~Str.ToRegEx #c)) ((~Re.Union ((~Re.Concat ~Re.None) (~Str.ToRegEx #a))) ((~Re.Concat ~Re.None) (~Str.ToRegEx #b)))), + none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "c((^a)|(^b))" + +/-- +info: (((~Re.Concat ((~Re.Union ((~Re.Concat (~Str.ToRegEx #a)) ~Re.None)) ((~Re.Concat (~Str.ToRegEx #b)) ~Re.None))) (~Str.ToRegEx #c)), + none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "((a$)|(b$))c" + +/-- +info: (((~Re.Concat ((~Re.Union ((~Re.Concat (~Str.ToRegEx #a)) ~Re.None)) (~Str.ToRegEx #b))) (~Str.ToRegEx #c)), none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "((a$)|(b))c" + +/-- +info: (((~Re.Concat (~Str.ToRegEx #c)) ((~Re.Union ((~Re.Concat (~Str.ToRegEx #a)) (~Str.ToRegEx #))) ((~Re.Concat ((~Re.Concat ~Re.None) (~Str.ToRegEx #b))) (~Str.ToRegEx #)))), + none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "c((a$)|(^b$))" + +/-- +info: (((~Re.Concat ((~Re.Union ((~Re.Concat (~Str.ToRegEx #a)) ~Re.None)) (~Str.ToRegEx #b))) (~Str.ToRegEx #c)), none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "((a$)|(b))c" + +/-- info: (((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) ~Re.None)) (~Str.ToRegEx #b)), none) -/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "^$b" + +/-- +info: (((~Re.Union ((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #a))) (~Str.ToRegEx #))) ((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) ~Re.None)) (~Str.ToRegEx #b))), + none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "^a$|^$b" + +/-- +info: (((~Re.Concat ((~Re.Concat (~Str.ToRegEx #c)) ((~Re.Union ((~Re.Concat ~Re.None) (~Str.ToRegEx #a))) ((~Re.Concat (~Str.ToRegEx #b)) ~Re.None)))) (~Str.ToRegEx #d)), + none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "c(^a|b$)d" + +/-- +info: (((~Re.Concat ((~Re.Concat (~Str.ToRegEx #c)) ((~Re.Union ((~Re.Concat ~Re.None) (~Str.ToRegEx #a))) ((~Re.Concat (~Str.ToRegEx #b)) ~Re.None)))) (~Str.ToRegEx #d)), + none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "(c(^a|b$))d" + +/-- +info: (((~Re.Concat ((~Re.Union ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #a))) ((~Re.Concat (~Str.ToRegEx #b)) ~Re.None))) ((~Re.Union ((~Re.Concat ~Re.None) (~Str.ToRegEx #c))) ((~Re.Concat (~Str.ToRegEx #d)) (~Str.ToRegEx #)))), + none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "(^a|b$)(^c|d$)" + +/-- +info: (((~Re.Concat ((~Re.Concat ((~Re.Union ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #a))) ((~Re.Concat (~Str.ToRegEx #b)) ~Re.None))) ~Re.None)) (~Str.ToRegEx #c)), + none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "((^a|b$)^)c" + +/-- info: (((~Re.Concat ((~Re.Union (~Str.ToRegEx #)) ~Re.None)) (~Str.ToRegEx #c)), none) -/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "(^|$)c" + +/-- info: (((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #)), none) -/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "^^" + +/-- +info: (((~Re.Concat ((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #))) (~Str.ToRegEx #))) (~Str.ToRegEx #)), none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "^$$^" + +/-- info: (((~Re.Concat ((~Re.Union (~Str.ToRegEx #)) (~Str.ToRegEx #))) (~Str.ToRegEx #)), none) -/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "(^|$)^" + +/-- +info: (((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #a))) (~Str.ToRegEx #)), none) +-/ +#guard_msgs in +#eval Std.format $ pythonRegexToBoogie "^a$" .fullmatch /-- info: (~Re.All, @@ -152,15 +340,17 @@ info: (~Re.All, #eval Std.format $ pythonRegexToBoogie "x{100,2}" .fullmatch -- (unmatchable) -/-- info: (~Re.None, none) -/ +/-- info: (((~Re.Concat ((~Re.Concat (~Str.ToRegEx #a)) ~Re.None)) (~Str.ToRegEx #b)), none) -/ #guard_msgs in #eval Std.format $ pythonRegexToBoogie "a^b" .fullmatch -/-- info: (~Re.None, none) -/ +/-- +info: (((~Re.Concat ((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #a))) ~Re.None)) (~Str.ToRegEx #b)), none) +-/ #guard_msgs in #eval Std.format $ pythonRegexToBoogie "^a^b" .fullmatch -/-- info: (~Re.None, none) -/ +/-- info: (((~Re.Concat ((~Re.Concat (~Str.ToRegEx #a)) ~Re.None)) (~Str.ToRegEx #b)), none) -/ #guard_msgs in #eval Std.format $ pythonRegexToBoogie "a$b" .fullmatch @@ -180,27 +370,37 @@ info: (~Re.All, #guard_msgs in #eval Std.format $ pythonRegexToBoogie "a" .fullmatch -/-- info: (((~Re.Concat (~Str.ToRegEx #a)) ~Re.All), none) -/ +/-- +info: (((~Re.Concat (~Str.ToRegEx #a)) ((~Re.Union ((~Re.Union (~Str.ToRegEx #)) ~Re.AllChar)) ((~Re.Concat ((~Re.Concat ~Re.AllChar) (~Re.Star ~Re.AllChar))) ~Re.AllChar))), + none) +-/ #guard_msgs in #eval Std.format $ pythonRegexToBoogie "a" .match -- search mode tests -/-- info: (((~Re.Concat ~Re.All) ((~Re.Concat (~Str.ToRegEx #a)) ~Re.All)), none) -/ +/-- +info: (((~Re.Concat ((~Re.Union ((~Re.Union (~Str.ToRegEx #)) ~Re.AllChar)) ((~Re.Concat ((~Re.Concat ~Re.AllChar) (~Re.Star ~Re.AllChar))) ~Re.AllChar))) ((~Re.Concat (~Str.ToRegEx #a)) ((~Re.Union ((~Re.Union (~Str.ToRegEx #)) ~Re.AllChar)) ((~Re.Concat ((~Re.Concat ~Re.AllChar) (~Re.Star ~Re.AllChar))) ~Re.AllChar)))), + none) +-/ #guard_msgs in #eval Std.format $ pythonRegexToBoogie "a" .search -/-- info: ((~Str.ToRegEx #a), none) -/ +/-- +info: (((~Re.Concat ((~Re.Union ((~Re.Union (~Str.ToRegEx #)) ~Re.AllChar)) ((~Re.Concat ((~Re.Concat ~Re.AllChar) (~Re.Star ~Re.AllChar))) ~Re.AllChar))) ((~Re.Concat ((~Re.Concat ((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #a))) (~Str.ToRegEx #))) ((~Re.Union ((~Re.Union (~Str.ToRegEx #)) ~Re.AllChar)) ((~Re.Concat ((~Re.Concat ~Re.AllChar) (~Re.Star ~Re.AllChar))) ~Re.AllChar)))), + none) +-/ #guard_msgs in #eval Std.format $ pythonRegexToBoogie "^a$" .search -/-- info: (((~Re.Concat (~Str.ToRegEx #a)) ~Re.All), none) -/ +/-- info: (((~Re.Concat (~Str.ToRegEx #)) (~Str.ToRegEx #a)), none) -/ #guard_msgs in #eval Std.format $ pythonRegexToBoogie "^a" .fullmatch -/-- info: (((~Re.Concat ~Re.All) (~Str.ToRegEx #a)), none) -/ -#guard_msgs in -#eval Std.format $ pythonRegexToBoogie "a$" .match +-- -- BAD +-- #eval Std.format $ pythonRegexToBoogie "a$.*" .fullmatch +-- +-- -- BAD +-- #eval Std.format $ pythonRegexToBoogie "a$" .match -end Test.pythonRegexToBoogie ------------------------------------------------------------------------------- diff --git a/StrataMain.lean b/StrataMain.lean index 95b29f9d..85ac8262 100644 --- a/StrataMain.lean +++ b/StrataMain.lean @@ -193,7 +193,10 @@ def pyAnalyzeCommand : Command where if verbose then IO.print newPgm let vcResults ← EIO.toIO (fun f => IO.Error.userError (toString f)) - (Boogie.verify "z3" newPgm { Options.default with stopOnFirstError := false, verbose }) + (Boogie.verify "z3" newPgm { Options.default with stopOnFirstError := false, + verbose, + removeIrrelevantAxioms := true } + (moreFns := Strata.Python.ReFactory)) let mut s := "" for vcResult in vcResults do s := s ++ s!"\n{vcResult.obligation.label}: {Std.format vcResult.result}\n" diff --git a/StrataTest/Languages/Python/expected/test_0.expected b/StrataTest/Languages/Python/expected/test_0.expected index 84dccf1f..9e6ba26c 100644 --- a/StrataTest/Languages/Python/expected/test_0.expected +++ b/StrataTest/Languages/Python/expected/test_0.expected @@ -3,24 +3,25 @@ ensures_maybe_except_none: verified (Origin_test_helper_procedure_Requires)req_name_is_foo: verified -(Origin_test_helper_procedure_Requires)opt_name_none_or_bar: verified +(Origin_test_helper_procedure_Requires)opt_name_none_or_str: verified (Origin_test_helper_procedure_Requires)opt_name_none_or_bar: verified (Origin_test_helper_procedure_Requires)req_name_is_foo: verified -(Origin_test_helper_procedure_Requires)opt_name_none_or_bar: verified +(Origin_test_helper_procedure_Requires)opt_name_none_or_str: verified (Origin_test_helper_procedure_Requires)opt_name_none_or_bar: verified -(Origin_test_helper_procedure_Requires)req_name_is_foo: unknown +(Origin_test_helper_procedure_Requires)req_name_is_foo: failed +CEx: -(Origin_test_helper_procedure_Requires)opt_name_none_or_bar: verified +(Origin_test_helper_procedure_Requires)opt_name_none_or_str: verified (Origin_test_helper_procedure_Requires)opt_name_none_or_bar: verified (Origin_test_helper_procedure_Requires)req_name_is_foo: verified -(Origin_test_helper_procedure_Requires)opt_name_none_or_bar: verified +(Origin_test_helper_procedure_Requires)opt_name_none_or_str: verified (Origin_test_helper_procedure_Requires)opt_name_none_or_bar: unknown diff --git a/StrataTest/Languages/Python/expected/test_1.expected b/StrataTest/Languages/Python/expected/test_1.expected index c8d278c4..885cc9cf 100644 --- a/StrataTest/Languages/Python/expected/test_1.expected +++ b/StrataTest/Languages/Python/expected/test_1.expected @@ -1,8 +1,9 @@ ensures_maybe_except_none: verified -(Origin_test_helper_procedure_Requires)req_name_is_foo: unknown +(Origin_test_helper_procedure_Requires)req_name_is_foo: failed +CEx: ($__s8, "") -(Origin_test_helper_procedure_Requires)opt_name_none_or_bar: verified +(Origin_test_helper_procedure_Requires)opt_name_none_or_str: verified (Origin_test_helper_procedure_Requires)opt_name_none_or_bar: verified diff --git a/StrataTest/Languages/Python/run_py_analyze.sh b/StrataTest/Languages/Python/run_py_analyze.sh index 44c0218d..50726ffd 100755 --- a/StrataTest/Languages/Python/run_py_analyze.sh +++ b/StrataTest/Languages/Python/run_py_analyze.sh @@ -20,4 +20,4 @@ for test_file in test_[0-9]*.py; do exit 1 fi fi -done \ No newline at end of file +done