Skip to content

Commit

Permalink
Factorgen: Fix sampling of messageIds table
Browse files Browse the repository at this point in the history
  • Loading branch information
szarnyasg committed Sep 16, 2022
1 parent 7895e28 commit 250da28
Showing 1 changed file with 4 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,10 @@ object FactorGenerationStage extends DatagenStage with Logging {
date_trunc("day", $"creationDate").as("creationDay"),
date_trunc("day", $"deletionDate").as("deletionDay"),
$"MessageId")
val sampleSize = 20000
val count = messages.count()
val sampleFraction = Math.min(sampleSize / count, 1.0)
messages.sample(sampleFraction, 42)
val sampleSize = 20000.0
val count = messages.count()
val sampleFraction = Math.min(sampleSize / count, 1.0)
messages.sample(sampleFraction, 42)
},
"countryNumPersons" -> Factor(PlaceType, PersonType) { case Seq(places, persons) =>
val cities = places.where($"type" === "City").cache()
Expand Down

0 comments on commit 250da28

Please sign in to comment.