Skip to content

Commit

Permalink
support for a re.from.str function
Browse files Browse the repository at this point in the history
  • Loading branch information
pruemmer committed Jun 24, 2020
1 parent 69c2ae8 commit 199c223
Show file tree
Hide file tree
Showing 10 changed files with 115 additions and 5 deletions.
4 changes: 2 additions & 2 deletions src/main/scala/ostrich/OstrichSolver.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ class OstrichSolver(theory : OstrichStringTheory,
flags : OFlags) {

import theory.{str, str_len, str_empty, str_cons, str_++, str_in_re,
str_to_re,
str_to_re, re_from_str,
str_replace, str_replacere, str_replaceall, str_replaceallre,
re_none, re_all, re_allchar, re_charrange,
re_++, re_union, re_inter, re_*, re_+, re_opt, re_comp,
re_loop, FunPred}

val rexOps : Set[IFunction] =
Set(re_none, re_all, re_allchar, re_charrange, re_++, re_union, re_inter,
re_*, re_+, re_opt, re_comp, re_loop, str_to_re)
re_*, re_+, re_opt, re_comp, re_loop, str_to_re, re_from_str)

private val p = theory.functionPredicateMap

Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/ostrich/OstrichStringTheory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ class OstrichStringTheory(transducers : Seq[(String, Transducer)],
str_replacere, str_replaceallre, str_to_re,
re_none, re_eps, re_all, re_allchar, re_charrange,
re_++, re_union, re_inter, re_*, re_+, re_opt, re_comp,
re_loop))
re_loop, re_from_str))
yield functionPredicateMap(f)) ++
(for (f <- List(str_len); if flags.useLength != OFlags.LengthOptions.Off)
yield functionPredicateMap(f)) ++
Expand Down
41 changes: 39 additions & 2 deletions src/main/scala/ostrich/Regex2Aut.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Regex2Aut(theory : OstrichStringTheory) {

import theory.{re_none, re_all, re_eps, re_allchar, re_charrange,
re_++, re_union, re_inter, re_*, re_+, re_opt, re_comp,
re_loop, str_to_re}
re_loop, str_to_re, re_from_str}

def buildBricsRegex(t : ITerm) : String = t match {
case IFunApp(`re_none`, _) =>
Expand Down Expand Up @@ -59,6 +59,7 @@ class Regex2Aut(theory : OstrichStringTheory) {
"~(" + buildBricsRegex(a) + ")"
case IFunApp(`re_loop`, Seq(IIntLit(n1), IIntLit(n2), a)) =>
"(" + buildBricsRegex(a) + "){" + n1 + "," + n2 + "}"

case IFunApp(`str_to_re`, Seq(a)) =>
StringTheory.term2List(a) match {
case Seq() =>
Expand All @@ -68,8 +69,44 @@ class Regex2Aut(theory : OstrichStringTheory) {
c <- "[\\" + numToUnicode(v) + "]")
yield c).mkString
}

case IFunApp(`re_from_str`, Seq(a)) => {
// TODO: this translation has to be checked more carefully, there might
// be problems due to escaping
val str = StringTheory.term2String(a)

// BRICS cannot handle anchors, so currently we are just removing them
// TODO: find a better solution for this

var begin = 0
while (begin < str.size && str(begin) == '^') {
Console.err.println("Warning: ignoring anchor ^")
begin = begin + 1
}

var end = str.size
while (end > 0 && str(end - 1) == '$') {
Console.err.println("Warning: ignoring anchor $")
end = end - 1
}

val str2 = str.slice(begin, end)

// handle some of the PCRE sequences
// TODO: do this more systematically
val str3 = str2.replaceAll("""\\w""", "[A-Za-z0-9_]")
.replaceAll("""\\W""", "[^A-Za-z0-9_]")
.replaceAll("""\\s""", "[ ]")
.replaceAll("""\\S""", "[^ ]")
.replaceAll("""\\d""", "[0-9]")
.replaceAll("""\\D""", "[^0-9]")

str3
}

case _ =>
throw new IllegalArgumentException
throw new IllegalArgumentException(
"could not translate " + t + " to an automaton")
}

def buildBricsAut(t : ITerm) : BAutomaton =
Expand Down
9 changes: 9 additions & 0 deletions src/test/scala/SMTLIBTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,13 @@ object SMTLIBTests extends Properties("SMTLIBTests") {
property("substring.smt2") =
checkFile("tests/substring.smt2", "sat")

property("parse-regex.smt2") =
checkFile("tests/parse-regex.smt2", "sat")
property("parse-regex2.smt2") =
checkFile("tests/parse-regex2.smt2", "sat")
property("parse-regex2b.smt2") =
checkFile("tests/parse-regex2b.smt2", "unsat")
property("parse-regex4.smt2") =
checkFile("tests/parse-regex4.smt2", "sat")

}
20 changes: 20 additions & 0 deletions tests/parse-regex-incremental.smt2
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@


(set-logic QF_S)

(declare-const w String)

(assert (str.in.re w (re.from.str "^[a-zA-Z][a-zA-Z0-9-_\.]{1,20}$")))

(check-sat)
(get-model)

(assert (str.in.re w (re.* (re.range "a" "z"))))

(check-sat)
(get-model)

(assert (= (str.len w) 5))

(check-sat)
(get-model)
10 changes: 10 additions & 0 deletions tests/parse-regex.smt2
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
(set-logic QF_S)

(declare-const w String)

(assert (str.in.re w (re.from.str "^[a-zA-Z][a-zA-Z0-9-_\.]{1,20}$")))
(assert (str.in.re w (re.* (re.range "a" "z"))))
(assert (= (str.len w) 5))

(check-sat)
(get-model)
8 changes: 8 additions & 0 deletions tests/parse-regex2.smt2
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
(set-logic QF_S)

(declare-const w String)

(assert (str.in.re w (re.from.str "[\+]\d{2}[\(]\d{2}[\)]\d{4}[\-]\d{4}")))

(check-sat)
(get-model)
9 changes: 9 additions & 0 deletions tests/parse-regex2b.smt2
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
(set-logic QF_S)

(declare-const w String)

(assert (str.in.re w (re.from.str "[\+]\d{2}[\(]\d{2}[\)]\d{4}[\-]\d{4}")))
(assert (str.contains w "a"))

(check-sat)
(get-model)
9 changes: 9 additions & 0 deletions tests/parse-regex3.smt2
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
(set-logic QF_S)

(declare-const w String)

; currently not working; the : is unexpected
(assert (str.in.re w (re.from.str "^(?:0|\(?\+33\)?\s?|0033\s?)[1-79](?:[\.\-\s]?\d\d){4}$")))

(check-sat)
(get-model)
8 changes: 8 additions & 0 deletions tests/parse-regex4.smt2
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
(set-logic QF_S)

(declare-const w String)

(assert (str.in.re w (re.from.str "^#?([a-fA-F0-9]{6}|[a-fA-F0-9]{3})$")))

(check-sat)
(get-model)

0 comments on commit 199c223

Please sign in to comment.