Skip to content
This repository has been archived by the owner on Apr 14, 2023. It is now read-only.

Commit

Permalink
fix(#1187): String lengths are uniformly random
Browse files Browse the repository at this point in the history
Previously string lengths would follow a geometric distribution between
the min and max, where each successive term would have a 3/10 chance of
being the terminal character added to the string.

Therefore in the previous case, regardless of the maximum (where the maximum is at
least 5 greater than the minimum), typically 50% of the values would be
within 2 characters of the minimum length.

This change instead generates up to the maximum length (our current
default is 1,000), and slices randomly between the min and max length.
This change appears to cut performance by about half for the example
given in shorter-than, at the advantage of giving a uniform
distribution.

RandomStringFactory now uses a single string, instead of doing constant
string concatenation. This stops strings being pushed onto the heap.
  • Loading branch information
rstuart-scottlogic committed Nov 14, 2019
1 parent ebd6782 commit 0ea6c71
Show file tree
Hide file tree
Showing 29 changed files with 75 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public class Defaults {

public static final BigDecimal NUMERIC_MAX = new BigDecimal("1e20");
public static final BigDecimal NUMERIC_MIN = new BigDecimal("-1e20");

public static final int MAX_STRING_LENGTH = 1000;
public static final OffsetDateTime ISO_MAX_DATE = OffsetDateTime.of(9999, 12, 31, 23, 59, 59, 999_000_000, ZoneOffset.UTC);
public static final OffsetDateTime ISO_MIN_DATE = OffsetDateTime.of(1, 1, 1, 0, 0, 0, 0, ZoneOffset.UTC);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,62 +16,46 @@

package com.scottlogic.deg.generator.generation.string.factorys;

import com.scottlogic.deg.common.ValidationException;
import com.scottlogic.deg.generator.generation.string.StringUtils;
import com.scottlogic.deg.generator.utils.RandomNumberGenerator;
import dk.brics.automaton.State;
import dk.brics.automaton.Transition;

import java.util.LinkedList;
import java.util.List;

public class RandomStringFactory {

public String createRandomString(
String currentString,
State state,
int minLength,
int maxLength,
RandomNumberGenerator random) {
public String createRandomString(State state, RandomNumberGenerator random) {
List<Integer> validIndices = new LinkedList<>();

if (finishCreating(currentString, state, minLength, maxLength, random)) {
return currentString;
}

List<Transition> transitions = state.getSortedTransitions(false);

Transition randomTransition = transitions.get(random.nextInt(transitions.size()));

char randomChar = getRandomChar(random, randomTransition);

return createRandomString(
currentString + randomChar,
randomTransition.getDest(),
minLength,
maxLength,
random);
}

private boolean finishCreating(
String currentString,
State state,
int minLength,
int maxLength,
RandomNumberGenerator random) {
StringBuilder builder = new StringBuilder();

if (state.isAccept()) {
if (currentString.length() == maxLength) {
return true;
}
if (currentString.length() >= minLength && randomlyStop(random)) {
return true;
validIndices.add(0);
}

for (int i = 1; !finishCreating(state); i++) {
List<Transition> transitions = state.getSortedTransitions(false);
Transition randomTransition = transitions.get(random.nextInt(transitions.size()));
builder.append(getRandomChar(random, randomTransition));
state = randomTransition.getDest();
if (state.isAccept()) {
validIndices.add(i);
}
}

return state.getTransitions().isEmpty();
}
if (validIndices.isEmpty()) {
throw new ValidationException("No possible states from the current regex");
}

int randomIndex = random.nextInt(validIndices.size());
return builder.toString().substring(0, validIndices.get(randomIndex));
}

private boolean randomlyStop(RandomNumberGenerator random) {
return random.nextInt(10) < 3; // 3 in 10 chance of stopping
private boolean finishCreating(State state) {
return state.getTransitions().isEmpty();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@

package com.scottlogic.deg.generator.generation.string.generators;

import com.scottlogic.deg.common.profile.FieldType;
import com.scottlogic.deg.generator.fieldspecs.FieldSpecFactory;
import com.scottlogic.deg.generator.generation.string.AutomatonUtils;
import com.scottlogic.deg.generator.generation.string.iterators.FiniteStringAutomatonIterator;
import com.scottlogic.deg.generator.generation.string.factorys.InterestingStringFactory;
import com.scottlogic.deg.generator.generation.string.factorys.RandomStringFactory;
import com.scottlogic.deg.generator.restrictions.string.StringRestrictions;
import com.scottlogic.deg.generator.utils.RandomNumberGenerator;
import dk.brics.automaton.Automaton;

Expand All @@ -39,6 +42,8 @@ public class RegexStringGenerator implements StringGenerator {
*/
private static final Map<String, Automaton> containingRegexAutomatonCache = new HashMap<>();

private static final RegexStringGenerator DEFAULT = (RegexStringGenerator) ((StringRestrictions) FieldSpecFactory.fromType(FieldType.STRING).getRestrictions()).createGenerator();

private Automaton automaton;
private final String regexRepresentation;

Expand Down Expand Up @@ -119,7 +124,7 @@ RegexStringGenerator union(RegexStringGenerator otherGenerator) {
@Override
public StringGenerator complement() {
return new RegexStringGenerator(
this.automaton.clone().complement(),
this.automaton.clone().complement().intersection(DEFAULT.automaton),
complementaryRepresentation(this.regexRepresentation));
}

Expand Down Expand Up @@ -150,10 +155,7 @@ public Stream<String> generateAllValues() {
public Stream<String> generateRandomValues(RandomNumberGenerator randomNumberGenerator) {
return Stream.generate(
() -> randomStringFactory.createRandomString(
"",
automaton.getInitialState(),
1,
Integer.MAX_VALUE,
randomNumberGenerator));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import com.scottlogic.deg.generator.restrictions.linear.Limit;
import com.scottlogic.deg.generator.restrictions.linear.LinearRestrictions;
import com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory;
import com.scottlogic.deg.generator.utils.Defaults;
import com.scottlogic.deg.generator.utils.GeneratorDefaults;

import java.time.LocalTime;

Expand Down Expand Up @@ -50,7 +50,7 @@ public AtomicConstraint negate() {
public FieldSpec toFieldSpec() {
final Limit<LocalTime> min = new Limit<>(referenceValue.getValue(), false);
final LinearRestrictions<LocalTime> timeRestrictions =
LinearRestrictionsFactory.createTimeRestrictions(min, Defaults.TIME_MAX_LIMIT);
LinearRestrictionsFactory.createTimeRestrictions(min, GeneratorDefaults.TIME_MAX_LIMIT);
return FieldSpecFactory.fromRestriction(timeRestrictions);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.Objects;

import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createDateTimeRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.DATETIME_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.DATETIME_MAX_LIMIT;

public class AfterConstraint implements AtomicConstraint {
public final Field field;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import java.util.Objects;

import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createDateTimeRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.DATETIME_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.DATETIME_MAX_LIMIT;

public class AfterOrAtConstraint implements AtomicConstraint {
public final Field field;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import com.scottlogic.deg.generator.restrictions.linear.Limit;
import com.scottlogic.deg.generator.restrictions.linear.LinearRestrictions;
import com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory;
import com.scottlogic.deg.generator.utils.Defaults;
import com.scottlogic.deg.generator.utils.GeneratorDefaults;

import java.time.LocalTime;

Expand Down Expand Up @@ -50,7 +50,7 @@ public AtomicConstraint negate() {
public FieldSpec toFieldSpec() {
final Limit<LocalTime> min = new Limit<>(referenceValue.getValue(), true);
final LinearRestrictions<LocalTime> timeRestrictions =
LinearRestrictionsFactory.createTimeRestrictions(min, Defaults.TIME_MAX_LIMIT);
LinearRestrictionsFactory.createTimeRestrictions(min, GeneratorDefaults.TIME_MAX_LIMIT);
return FieldSpecFactory.fromRestriction(timeRestrictions);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import com.scottlogic.deg.generator.restrictions.linear.Limit;
import com.scottlogic.deg.generator.restrictions.linear.LinearRestrictions;
import com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory;
import com.scottlogic.deg.generator.utils.Defaults;
import com.scottlogic.deg.generator.utils.GeneratorDefaults;

import java.time.LocalTime;

Expand Down Expand Up @@ -51,7 +51,7 @@ public FieldSpec toFieldSpec() {

final Limit<LocalTime> max = new Limit<>(referenceValue.getValue(), false);
final LinearRestrictions<LocalTime> timeRestriction =
LinearRestrictionsFactory.createTimeRestrictions(Defaults.TIME_MIN_LIMIT, max);
LinearRestrictionsFactory.createTimeRestrictions(GeneratorDefaults.TIME_MIN_LIMIT, max);
if (timeRestriction.isContradictory()) {
return FieldSpecFactory.nullOnly();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.Objects;

import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createDateTimeRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.DATETIME_MIN_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.DATETIME_MIN_LIMIT;

public class BeforeConstraint implements AtomicConstraint {
public final Field field;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.Objects;

import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createDateTimeRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.DATETIME_MIN_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.DATETIME_MIN_LIMIT;

public class BeforeOrAtConstraint implements AtomicConstraint {
public final Field field;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import com.scottlogic.deg.generator.restrictions.linear.Limit;
import com.scottlogic.deg.generator.restrictions.linear.LinearRestrictions;
import com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory;
import com.scottlogic.deg.generator.utils.Defaults;
import com.scottlogic.deg.generator.utils.GeneratorDefaults;

import java.time.LocalTime;

Expand All @@ -49,7 +49,7 @@ public AtomicConstraint negate() {
public FieldSpec toFieldSpec() {
final Limit<LocalTime> max = new Limit<>(referenceValue.getValue(), true);
final LinearRestrictions<LocalTime> timeRestriction =
LinearRestrictionsFactory.createTimeRestrictions(Defaults.TIME_MIN_LIMIT, max);
LinearRestrictionsFactory.createTimeRestrictions(GeneratorDefaults.TIME_MIN_LIMIT, max);
return FieldSpecFactory.fromRestriction(timeRestriction);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
import java.util.Objects;

import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createDateTimeRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.DATETIME_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.Defaults.DATETIME_MIN_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.DATETIME_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.DATETIME_MIN_LIMIT;

public class GranularToDateConstraint implements AtomicConstraint {
public final Field field;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import com.scottlogic.deg.generator.fieldspecs.FieldSpec;
import com.scottlogic.deg.generator.fieldspecs.FieldSpecFactory;
import com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory;
import com.scottlogic.deg.generator.utils.Defaults;
import com.scottlogic.deg.generator.utils.GeneratorDefaults;

public class GranularToTimeConstraint implements AtomicConstraint {
public final TimeGranularity timeGranularity;
Expand Down Expand Up @@ -51,8 +51,8 @@ public AtomicConstraint negate() {
public FieldSpec toFieldSpec() {
return FieldSpecFactory.fromRestriction(
LinearRestrictionsFactory.createTimeRestrictions(
Defaults.TIME_MIN_LIMIT,
Defaults.TIME_MAX_LIMIT,
GeneratorDefaults.TIME_MIN_LIMIT,
GeneratorDefaults.TIME_MAX_LIMIT,
timeGranularity));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.Objects;

import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createNumericRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.NUMERIC_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.NUMERIC_MAX_LIMIT;

public class GreaterThanConstraint implements AtomicConstraint {
public final Field field;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.Objects;

import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createNumericRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.NUMERIC_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.NUMERIC_MAX_LIMIT;

public class GreaterThanOrEqualToConstraint implements AtomicConstraint {
public final Field field;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.Objects;

import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createNumericRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.NUMERIC_MIN_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.NUMERIC_MIN_LIMIT;

public class LessThanConstraint implements AtomicConstraint {
public final Field field;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.Objects;

import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createNumericRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.NUMERIC_MIN_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.NUMERIC_MIN_LIMIT;

public class LessThanOrEqualToConstraint implements AtomicConstraint {
public final Field field;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import java.time.OffsetDateTime;

import static com.scottlogic.deg.common.util.Defaults.*;
import static com.scottlogic.deg.generator.utils.Defaults.*;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.*;

public class LinearRestrictionsFactory {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

import static com.scottlogic.deg.common.util.Defaults.*;

public class Defaults {
public class GeneratorDefaults {

public static final Limit<BigDecimal> NUMERIC_MAX_LIMIT = new Limit<>(NUMERIC_MAX, true);
public static final Limit<BigDecimal> NUMERIC_MIN_LIMIT= new Limit<>(NUMERIC_MIN, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

import static com.scottlogic.deg.common.profile.FieldType.*;
import static com.scottlogic.deg.generator.restrictions.string.StringRestrictionsFactory.forMaxLength;
import static com.scottlogic.deg.generator.utils.Defaults.*;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.*;
import static org.hamcrest.core.IsEqual.equalTo;
import static org.hamcrest.core.IsNot.not;
import static org.junit.Assert.assertFalse;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@

import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createDateTimeRestrictions;
import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createNumericRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.NUMERIC_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.Defaults.DATETIME_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.Defaults.DATETIME_MIN_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.NUMERIC_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.DATETIME_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.DATETIME_MIN_LIMIT;
import static org.mockito.Mockito.*;

class RestrictionsMergeOperationTest {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@

import static com.scottlogic.deg.common.profile.FieldType.*;
import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createNumericRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.NUMERIC_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.Defaults.NUMERIC_MIN_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.NUMERIC_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.NUMERIC_MIN_LIMIT;

public class FieldSpecGetFieldValueSourceTests {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
import java.util.stream.Stream;

import static com.scottlogic.deg.generator.restrictions.linear.LinearRestrictionsFactory.createNumericRestrictions;
import static com.scottlogic.deg.generator.utils.Defaults.NUMERIC_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.Defaults.NUMERIC_MIN_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.NUMERIC_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.NUMERIC_MIN_LIMIT;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.Matchers.*;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@

import static com.scottlogic.deg.common.util.Defaults.ISO_MAX_DATE;
import static com.scottlogic.deg.common.util.Defaults.ISO_MIN_DATE;
import static com.scottlogic.deg.generator.utils.Defaults.DATETIME_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.Defaults.DATETIME_MIN_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.DATETIME_MAX_LIMIT;
import static com.scottlogic.deg.generator.utils.GeneratorDefaults.DATETIME_MIN_LIMIT;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.core.Is.is;
import static org.hamcrest.core.IsNot.not;
Expand Down
Loading

0 comments on commit 0ea6c71

Please sign in to comment.