From 7996c8a38d15f0b734e60a3ad2acd04ca29168a7 Mon Sep 17 00:00:00 2001 From: Matthew Hague Date: Mon, 22 Jul 2024 16:08:38 +0100 Subject: [PATCH] Avoid use of global Regex2PFA datastructures The global maps (in the Regex2PFA object) were leading to non-deterministic test failures. Refactor so the datastructures are unqiue to the Regex2PFA instance that uses them. --- .../OstrichStringFunctionTranslator.scala | 5 +- .../ostrich/automata/PrioAutomaton.scala | 53 +++++++++++-------- .../CEStringFunctionTranslator.scala | 24 ++++----- 3 files changed, 46 insertions(+), 36 deletions(-) diff --git a/src/main/scala/ostrich/OstrichStringFunctionTranslator.scala b/src/main/scala/ostrich/OstrichStringFunctionTranslator.scala index 0b0d97a071..f9b4880079 100644 --- a/src/main/scala/ostrich/OstrichStringFunctionTranslator.scala +++ b/src/main/scala/ostrich/OstrichStringFunctionTranslator.scala @@ -56,8 +56,9 @@ class OstrichStringFunctionTranslator(theory : OstrichStringTheory, str_replaceallcg, str_replacecg, str_extract} private val regexExtractor = theory.RegexExtractor(facts.predConj) - private val cgTranslator = new Regex2PFA(theory, - new JavascriptPrioAutomatonBuilder) + private val builder = new JavascriptPrioAutomatonBuilder(theory) + private val cgTranslator = builder.regex2pfa + private def regexAsTerm(t : Term) : Option[ITerm] = try { diff --git a/src/main/scala/ostrich/automata/PrioAutomaton.scala b/src/main/scala/ostrich/automata/PrioAutomaton.scala index 04db3a50e1..b771f1c160 100644 --- a/src/main/scala/ostrich/automata/PrioAutomaton.scala +++ b/src/main/scala/ostrich/automata/PrioAutomaton.scala @@ -128,7 +128,7 @@ case class PrioAutomaton( // A PFA builder constructs PFA based on regular expression structure // following *certain* semantics of regex. -abstract class PrioAutomatonBuilder { +abstract class PrioAutomatonBuilder(val theory : OstrichStringTheory) { type State = PrioAutomaton.State type TLabel = PrioAutomaton.TLabel @@ -136,6 +136,11 @@ abstract class PrioAutomatonBuilder { type SigmaTransition = PrioAutomaton.SigmaTransition type ETransition = PrioAutomaton.ETransition + /* + * The Regex2PFA associated with this builder + */ + val regex2pfa = new Regex2PFA(theory, this) + def none() : PrioAutomaton def epsilon() : PrioAutomaton def single(lbl : TLabel) : PrioAutomaton @@ -171,7 +176,6 @@ abstract class PrioAutomatonBuilder { // 2) if state s is an initial state of some automaton corresponding // to capture group i, so is the copyed state s' def duplicate(base : PrioAutomaton) : PrioAutomaton = { - import Regex2PFA.{capState, stateCap, capInit} import PrioAutomaton.getNewState base match { @@ -233,11 +237,14 @@ abstract class PrioAutomatonBuilder { } // now the database ... - for (caps <- stateCap.get(oldstate).iterator; cap <- caps) { - stateCap.addBinding(newstate, cap) - capState.addBinding(cap, newstate) - if (capInit.getOrElse(cap, MSet()) contains oldstate) { - capInit.addBinding(cap, newstate) + for ( + caps <- regex2pfa.stateCap.get(oldstate).iterator; + cap <- caps + ) { + regex2pfa.stateCap.addBinding(newstate, cap) + regex2pfa.capState.addBinding(cap, newstate) + if (regex2pfa.capInit.getOrElse(cap, MSet()) contains oldstate) { + regex2pfa.capInit.addBinding(cap, newstate) } } } @@ -250,7 +257,8 @@ abstract class PrioAutomatonBuilder { } -class PythonPrioAutomatonBuilder extends PrioAutomatonBuilder { +class PythonPrioAutomatonBuilder(theory : OstrichStringTheory) + extends PrioAutomatonBuilder(theory) { // In python mode, we don't use the second component // of the accepted states because we don't differentiate these two types of acceptance condition // thus, **all accepting states are in F1** @@ -443,7 +451,8 @@ class PythonPrioAutomatonBuilder extends PrioAutomatonBuilder { } -class JavascriptPrioAutomatonBuilder extends PrioAutomatonBuilder { +class JavascriptPrioAutomatonBuilder(theory : OstrichStringTheory) + extends PrioAutomatonBuilder(theory) { // In js mode, we use the first component F1 (res. F2) to denote accepting // states which only accepts empty (res. nonempty) traces. // to approximate the js semantics as precisely as possible, @@ -796,17 +805,6 @@ object Regex2PFA { } } } - - // mutable maps collecting info during translation - val capState = - new MHashMap[Int, MSet[State]] - with MMultiMap[Int, State] - val stateCap = - new MHashMap[State, MSet[Int]] - with MMultiMap[State, Int] - val capInit = - new MHashMap[Int, MSet[State]] - with MMultiMap[Int, State] } class Regex2PFA(theory : OstrichStringTheory, builder : PrioAutomatonBuilder) { @@ -821,6 +819,17 @@ class Regex2PFA(theory : OstrichStringTheory, builder : PrioAutomatonBuilder) { private val simplifier = new Regex2Aut.DiffEliminator(theory) + // mutable maps collecting info during translation + val capState = + new MHashMap[Int, MSet[State]] + with MMultiMap[Int, State] + val stateCap = + new MHashMap[State, MSet[Int]] + with MMultiMap[State, Int] + val capInit = + new MHashMap[Int, MSet[State]] + with MMultiMap[Int, State] + // this is the map from literal numbering to internal numbering of // capture groups. It is for translating the replacement string. private val capNumTransform = @@ -998,10 +1007,10 @@ class Regex2PFA(theory : OstrichStringTheory, builder : PrioAutomatonBuilder) { val (aut, _) = buildPatternImpl(simplifier(pat)) - val state2Capture = (for ((s, caps) <- Regex2PFA.stateCap) + val state2Capture = (for ((s, caps) <- stateCap) yield (s, caps.toSet)).toMap - val cap2Init = (for ((cap, inits) <- Regex2PFA.capInit) + val cap2Init = (for ((cap, inits) <- capInit) yield (cap, inits.toSet)).toMap (aut, numCapture, state2Capture, cap2Init) diff --git a/src/main/scala/ostrich/cesolver/stringtheory/CEStringFunctionTranslator.scala b/src/main/scala/ostrich/cesolver/stringtheory/CEStringFunctionTranslator.scala index 9c1ceb3c2f..56274e1c27 100644 --- a/src/main/scala/ostrich/cesolver/stringtheory/CEStringFunctionTranslator.scala +++ b/src/main/scala/ostrich/cesolver/stringtheory/CEStringFunctionTranslator.scala @@ -1,21 +1,21 @@ /** * This file is part of Ostrich, an SMT solver for strings. * Copyright (c) 2018-2023 Denghang Hu, Matthew Hague, Philipp Ruemmer. All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. - * + * * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. - * + * * * Neither the name of the authors nor the names of their * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS @@ -64,8 +64,8 @@ class CEStringFunctionTranslator(theory : CEStringTheory, str_replaceallcg, str_replacecg, str_extract} private val regexExtractor = theory.RegexExtractor(facts.predConj) - private val cgTranslator = new Regex2PFA(theory, - new JavascriptPrioAutomatonBuilder) + private val builder = new JavascriptPrioAutomatonBuilder(theory) + private val cgTranslator = builder.regex2pfa private def regexAsTerm(t : Term) : Option[ITerm] = try { @@ -80,10 +80,10 @@ class CEStringFunctionTranslator(theory : CEStringTheory, str_at, str_at_right, str_trim) ++ theory.extraFunctionPreOps.keys) yield FunPred(f)) ++ theory.transducerPreOps.keys - + def apply(a : Atom) : Option[(() => PreOp, Seq[Term], Term)] = a.pred match { - case FunPred(`str_len`) => + case FunPred(`str_len`) => Some((() => LengthCEPreOp(Internal2InputAbsy(a(1))), Seq(a(0)), a(1))) case FunPred(`str_++`) => @@ -100,7 +100,7 @@ class CEStringFunctionTranslator(theory : CEStringTheory, val matchStr = strDatabase term2ListGet a(2) map(_.toChar) val patternStr = strDatabase term2ListGet a(1) map(_.toChar) Some((() => ReplaceCEPreOp(patternStr, matchStr), Seq(a(0)), a(3))) - + case FunPred(`str_replacere`) if (strDatabase isConcrete a(2)) => val matchStr = strDatabase term2ListGet a(2) map(_.toChar) for (regex <- regexAsTerm(a(1))) yield { @@ -110,12 +110,12 @@ class CEStringFunctionTranslator(theory : CEStringTheory, } (op, List(a(0)), a(3)) } - case FunPred(`str_replaceall`) if (strDatabase isConcrete a(2)) && (strDatabase isConcrete a(1)) => + case FunPred(`str_replaceall`) if (strDatabase isConcrete a(2)) && (strDatabase isConcrete a(1)) => val matchStr = strDatabase term2ListGet a(2) map(_.toChar) val patternStr = strDatabase term2ListGet a(1) map(_.toChar) Some((() => ReplaceAllCEPreOp(patternStr, matchStr), Seq(a(0)), a(3))) - case FunPred(`str_replaceallre`) if (strDatabase isConcrete a(2)) => + case FunPred(`str_replaceallre`) if (strDatabase isConcrete a(2)) => val matchStr = strDatabase term2ListGet a(2) map(_.toChar) for (regex <- regexAsTerm(a(1))) yield { val op = () => {