diff --git a/src/main/scala/ostrich/OstrichSolver.scala b/src/main/scala/ostrich/OstrichSolver.scala index e668f48749..8fc23e7497 100644 --- a/src/main/scala/ostrich/OstrichSolver.scala +++ b/src/main/scala/ostrich/OstrichSolver.scala @@ -35,7 +35,7 @@ class OstrichSolver(theory : OstrichStringTheory, flags : OFlags) { import theory.{str, str_len, str_empty, str_cons, str_++, str_in_re, - str_to_re, + str_to_re, re_from_str, str_replace, str_replacere, str_replaceall, str_replaceallre, re_none, re_all, re_allchar, re_charrange, re_++, re_union, re_inter, re_*, re_+, re_opt, re_comp, @@ -43,7 +43,7 @@ class OstrichSolver(theory : OstrichStringTheory, val rexOps : Set[IFunction] = Set(re_none, re_all, re_allchar, re_charrange, re_++, re_union, re_inter, - re_*, re_+, re_opt, re_comp, re_loop, str_to_re) + re_*, re_+, re_opt, re_comp, re_loop, str_to_re, re_from_str) private val p = theory.functionPredicateMap diff --git a/src/main/scala/ostrich/OstrichStringTheory.scala b/src/main/scala/ostrich/OstrichStringTheory.scala index 459631757e..5feafd99b1 100644 --- a/src/main/scala/ostrich/OstrichStringTheory.scala +++ b/src/main/scala/ostrich/OstrichStringTheory.scala @@ -132,7 +132,7 @@ class OstrichStringTheory(transducers : Seq[(String, Transducer)], str_replacere, str_replaceallre, str_to_re, re_none, re_eps, re_all, re_allchar, re_charrange, re_++, re_union, re_inter, re_*, re_+, re_opt, re_comp, - re_loop)) + re_loop, re_from_str)) yield functionPredicateMap(f)) ++ (for (f <- List(str_len); if flags.useLength != OFlags.LengthOptions.Off) yield functionPredicateMap(f)) ++ diff --git a/src/main/scala/ostrich/Regex2Aut.scala b/src/main/scala/ostrich/Regex2Aut.scala index 62688cdb21..127cd3b95e 100644 --- a/src/main/scala/ostrich/Regex2Aut.scala +++ b/src/main/scala/ostrich/Regex2Aut.scala @@ -29,7 +29,7 @@ class Regex2Aut(theory : OstrichStringTheory) { import theory.{re_none, re_all, re_eps, re_allchar, re_charrange, re_++, re_union, re_inter, re_*, re_+, re_opt, re_comp, - re_loop, str_to_re} + re_loop, str_to_re, re_from_str} def buildBricsRegex(t : ITerm) : String = t match { case IFunApp(`re_none`, _) => @@ -59,6 +59,7 @@ class Regex2Aut(theory : OstrichStringTheory) { "~(" + buildBricsRegex(a) + ")" case IFunApp(`re_loop`, Seq(IIntLit(n1), IIntLit(n2), a)) => "(" + buildBricsRegex(a) + "){" + n1 + "," + n2 + "}" + case IFunApp(`str_to_re`, Seq(a)) => StringTheory.term2List(a) match { case Seq() => @@ -68,8 +69,44 @@ class Regex2Aut(theory : OstrichStringTheory) { c <- "[\\" + numToUnicode(v) + "]") yield c).mkString } + + case IFunApp(`re_from_str`, Seq(a)) => { + // TODO: this translation has to be checked more carefully, there might + // be problems due to escaping + val str = StringTheory.term2String(a) + + // BRICS cannot handle anchors, so currently we are just removing them + // TODO: find a better solution for this + + var begin = 0 + while (begin < str.size && str(begin) == '^') { + Console.err.println("Warning: ignoring anchor ^") + begin = begin + 1 + } + + var end = str.size + while (end > 0 && str(end - 1) == '$') { + Console.err.println("Warning: ignoring anchor $") + end = end - 1 + } + + val str2 = str.slice(begin, end) + + // handle some of the PCRE sequences + // TODO: do this more systematically + val str3 = str2.replaceAll("""\\w""", "[A-Za-z0-9_]") + .replaceAll("""\\W""", "[^A-Za-z0-9_]") + .replaceAll("""\\s""", "[ ]") + .replaceAll("""\\S""", "[^ ]") + .replaceAll("""\\d""", "[0-9]") + .replaceAll("""\\D""", "[^0-9]") + + str3 + } + case _ => - throw new IllegalArgumentException + throw new IllegalArgumentException( + "could not translate " + t + " to an automaton") } def buildBricsAut(t : ITerm) : BAutomaton = diff --git a/src/test/scala/SMTLIBTests.scala b/src/test/scala/SMTLIBTests.scala index 0e19e9a696..696ca5d4da 100644 --- a/src/test/scala/SMTLIBTests.scala +++ b/src/test/scala/SMTLIBTests.scala @@ -161,4 +161,13 @@ object SMTLIBTests extends Properties("SMTLIBTests") { property("substring.smt2") = checkFile("tests/substring.smt2", "sat") + property("parse-regex.smt2") = + checkFile("tests/parse-regex.smt2", "sat") + property("parse-regex2.smt2") = + checkFile("tests/parse-regex2.smt2", "sat") + property("parse-regex2b.smt2") = + checkFile("tests/parse-regex2b.smt2", "unsat") + property("parse-regex4.smt2") = + checkFile("tests/parse-regex4.smt2", "sat") + } diff --git a/tests/parse-regex-incremental.smt2 b/tests/parse-regex-incremental.smt2 new file mode 100644 index 0000000000..0eca1bdccf --- /dev/null +++ b/tests/parse-regex-incremental.smt2 @@ -0,0 +1,20 @@ + + +(set-logic QF_S) + +(declare-const w String) + +(assert (str.in.re w (re.from.str "^[a-zA-Z][a-zA-Z0-9-_\.]{1,20}$"))) + +(check-sat) +(get-model) + +(assert (str.in.re w (re.* (re.range "a" "z")))) + +(check-sat) +(get-model) + +(assert (= (str.len w) 5)) + +(check-sat) +(get-model) diff --git a/tests/parse-regex.smt2 b/tests/parse-regex.smt2 new file mode 100644 index 0000000000..391ac7f110 --- /dev/null +++ b/tests/parse-regex.smt2 @@ -0,0 +1,10 @@ +(set-logic QF_S) + +(declare-const w String) + +(assert (str.in.re w (re.from.str "^[a-zA-Z][a-zA-Z0-9-_\.]{1,20}$"))) +(assert (str.in.re w (re.* (re.range "a" "z")))) +(assert (= (str.len w) 5)) + +(check-sat) +(get-model) diff --git a/tests/parse-regex2.smt2 b/tests/parse-regex2.smt2 new file mode 100644 index 0000000000..1059b99f28 --- /dev/null +++ b/tests/parse-regex2.smt2 @@ -0,0 +1,8 @@ +(set-logic QF_S) + +(declare-const w String) + +(assert (str.in.re w (re.from.str "[\+]\d{2}[\(]\d{2}[\)]\d{4}[\-]\d{4}"))) + +(check-sat) +(get-model) diff --git a/tests/parse-regex2b.smt2 b/tests/parse-regex2b.smt2 new file mode 100644 index 0000000000..56d756e943 --- /dev/null +++ b/tests/parse-regex2b.smt2 @@ -0,0 +1,9 @@ +(set-logic QF_S) + +(declare-const w String) + +(assert (str.in.re w (re.from.str "[\+]\d{2}[\(]\d{2}[\)]\d{4}[\-]\d{4}"))) +(assert (str.contains w "a")) + +(check-sat) +(get-model) diff --git a/tests/parse-regex3.smt2 b/tests/parse-regex3.smt2 new file mode 100644 index 0000000000..e3242027dd --- /dev/null +++ b/tests/parse-regex3.smt2 @@ -0,0 +1,9 @@ +(set-logic QF_S) + +(declare-const w String) + +; currently not working; the : is unexpected +(assert (str.in.re w (re.from.str "^(?:0|\(?\+33\)?\s?|0033\s?)[1-79](?:[\.\-\s]?\d\d){4}$"))) + +(check-sat) +(get-model) diff --git a/tests/parse-regex4.smt2 b/tests/parse-regex4.smt2 new file mode 100644 index 0000000000..f306b36360 --- /dev/null +++ b/tests/parse-regex4.smt2 @@ -0,0 +1,8 @@ +(set-logic QF_S) + +(declare-const w String) + +(assert (str.in.re w (re.from.str "^#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3})$"))) + +(check-sat) +(get-model)