From 250da2805fbd9ec25833d954ecaa224286563a83 Mon Sep 17 00:00:00 2001 From: Gabor Szarnyas Date: Fri, 16 Sep 2022 14:27:50 +0000 Subject: [PATCH] Factorgen: Fix sampling of messageIds table --- .../ldbc/snb/datagen/factors/FactorGenerationStage.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/scala/ldbc/snb/datagen/factors/FactorGenerationStage.scala b/src/main/scala/ldbc/snb/datagen/factors/FactorGenerationStage.scala index 281d5cd5d..ac3fcd597 100644 --- a/src/main/scala/ldbc/snb/datagen/factors/FactorGenerationStage.scala +++ b/src/main/scala/ldbc/snb/datagen/factors/FactorGenerationStage.scala @@ -173,10 +173,10 @@ object FactorGenerationStage extends DatagenStage with Logging { date_trunc("day", $"creationDate").as("creationDay"), date_trunc("day", $"deletionDate").as("deletionDay"), $"MessageId") - val sampleSize = 20000 - val count = messages.count() - val sampleFraction = Math.min(sampleSize / count, 1.0) - messages.sample(sampleFraction, 42) + val sampleSize = 20000.0 + val count = messages.count() + val sampleFraction = Math.min(sampleSize / count, 1.0) + messages.sample(sampleFraction, 42) }, "countryNumPersons" -> Factor(PlaceType, PersonType) { case Seq(places, persons) => val cities = places.where($"type" === "City").cache()