Skip to content

Commit

Permalink
getting OOM error with large datasets may need to increase memory_mul…
Browse files Browse the repository at this point in the history
…tiplier from 40 to 50
  • Loading branch information
jeffersonfparil committed Jun 6, 2024
1 parent 3056d89 commit 950c259
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 3 deletions.
4 changes: 4 additions & 0 deletions R/cross_validation.R
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,10 @@ fn_cross_validation_within_population = function(list_merged, n_folds=10, n_reps
df_metrics = rbind(df_metrics, list_perf$df_metrics)
df_y_validation = rbind(df_y_validation, list_perf$df_y_validation)
}
### Clean-up to reduce memory footprint
list_perf$df_metrics = NULL
list_perf$df_y_validation = NULL
gc()
}
### Save temporary Rds output per population
time_rand_id = paste0(round(as.numeric(Sys.time())), sample.int(1e6, size=1))
Expand Down
2 changes: 1 addition & 1 deletion exec/tests/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ do
--n-folds 5 \
--n-reps 1 \
--bool-parallel TRUE \
--max-mem-Gb 60 \
--max-mem-Gb 360 \
--n-threads 32 \
--verbose TRUE >> outdir/lucerne/logfile
done
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-cross_validation.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,6 @@ test_that("fn_cross_validation_across_populations_lopo", {
list_across_lopo = readRDS(fname_across_lopo_Rds)
expect_equal(sum(dim(list_across_lopo$METRICS_ACROSS_POP_LOPO) == c(3*1*2, 19)), 2)
expect_equal(sum(dim(list_across_lopo$YPRED_ACROSS_POP_LOPO) == c(3*(100/3)*2, 8)), 2)
expect_equal(mean(list_across_lopo$METRICS_ACROSS_POP_LOPO$corr) < 0.5, TRUE)
expect_equal(cor(list_across_lopo$YPRED_ACROSS_POP_LOPO$y_true, list_across_lopo$YPRED_ACROSS_POP_LOPO$y_pred) < 0.5, TRUE)
expect_equal(mean(list_across_lopo$METRICS_ACROSS_POP_LOPO$corr) < 0.9, TRUE)
expect_equal(cor(list_across_lopo$YPRED_ACROSS_POP_LOPO$y_true, list_across_lopo$YPRED_ACROSS_POP_LOPO$y_pred) < 0.9, TRUE)
})
17 changes: 17 additions & 0 deletions tests/testthat/test-io.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# source("R/io.R")

test_that("fn_G_extract_names", {
set.seed(123)
n = 100
n_alleles = 3
mat_genotypes = simquantgen::fn_simulate_genotypes(n=n, n_alleles=n_alleles, verbose=TRUE)
Expand All @@ -17,6 +18,7 @@ test_that("fn_G_extract_names", {
})

test_that("fn_G_split_off_alternative_allele", {
set.seed(123)
G_ref = simquantgen::fn_simulate_genotypes(verbose=TRUE)
G_alt = 1 - G_ref; colnames(G_alt) = gsub("allele_1$", "allele_2", colnames(G_alt))
G_refalt = cbind(G_ref, G_alt)
Expand All @@ -26,6 +28,7 @@ test_that("fn_G_split_off_alternative_allele", {
})

test_that("fn_G_numeric_to_non_numeric", {
set.seed(123)
ploidy = 42
G_numeric = simquantgen::fn_simulate_genotypes(ploidy=ploidy, n_alleles=52, verbose=TRUE)
G_non_numeric = fn_G_numeric_to_non_numeric(G=G_numeric, ploidy=ploidy, verbose=TRUE)
Expand All @@ -35,6 +38,7 @@ test_that("fn_G_numeric_to_non_numeric", {
})

test_that("fn_G_non_numeric_to_numeric", {
set.seed(123)
ploidy = 42
n_alleles = 2
G_numeric = simquantgen::fn_simulate_genotypes(ploidy=ploidy, n_alleles=n_alleles, verbose=TRUE)
Expand All @@ -46,6 +50,7 @@ test_that("fn_G_non_numeric_to_numeric", {
})

test_that("fn_G_to_vcf", {
set.seed(123)
n = 123
l = 456
n_alleles = 2
Expand All @@ -59,6 +64,7 @@ test_that("fn_G_to_vcf", {
})

test_that("fn_vcf_to_G", {
set.seed(123)
G = simquantgen::fn_simulate_genotypes(verbose=TRUE)
vcf = fn_G_to_vcf(G=G, min_depth=1000, max_depth=1000, verbose=TRUE)
G_back = fn_vcf_to_G(vcf=vcf, verbose=TRUE)
Expand All @@ -68,6 +74,7 @@ test_that("fn_vcf_to_G", {
})

test_that("fn_classify_allele_frequencies", {
set.seed(123)
ploidy = 4
G = simquantgen::fn_simulate_genotypes(ploidy=ploidy, verbose=TRUE)
G_classes = fn_classify_allele_frequencies(G=G, ploidy=ploidy, verbose=TRUE)
Expand All @@ -78,6 +85,7 @@ test_that("fn_classify_allele_frequencies", {
})

test_that("fn_simulate_data", {
set.seed(123)
list_sim = fn_simulate_data(verbose=TRUE)
expect_equal(is.null(list_sim$fname_geno_vcf), FALSE)
expect_equal(is.null(list_sim$fname_geno_tsv), TRUE)
Expand Down Expand Up @@ -109,6 +117,7 @@ test_that("fn_simulate_data", {
})

test_that("fn_load_genotype", {
set.seed(123)
list_sim = fn_simulate_data(min_depth=1000, max_depth=1000, save_geno_vcf=TRUE, save_geno_tsv=TRUE, save_geno_rds=TRUE, save_pheno_tsv=TRUE, verbose=TRUE)
G_vcf = fn_load_genotype(fname_geno=list_sim$fname_geno_vcf, verbose=TRUE)
G_tsv = fn_load_genotype(fname_geno=list_sim$fname_geno_tsv, verbose=TRUE)
Expand All @@ -123,6 +132,7 @@ test_that("fn_load_genotype", {
})

test_that("fn_filter_genotype", {
set.seed(123)
list_sim = fn_simulate_data(verbose=TRUE)
maf = 0.05
sdev_min = 0.0001
Expand Down Expand Up @@ -195,6 +205,7 @@ test_that("fn_filter_genotype", {
})

test_that("fn_save_genotype", {
set.seed(123)
list_sim = fn_simulate_data(min_depth=1000, max_depth=1000, verbose=TRUE)
G = fn_load_genotype(fname_geno=list_sim$fname_geno_vcf, verbose=TRUE)
fname_Rds = tempfile(fileext=".Rds")
Expand All @@ -212,6 +223,7 @@ test_that("fn_save_genotype", {
})

test_that("fn_load_phenotype", {
set.seed(123)
list_sim = fn_simulate_data(verbose=TRUE)
df_y = utils::read.table(list_sim$fname_pheno_tsv, header=TRUE)
fname_csv = gsub(".tsv$", ".csv", list_sim$fname_pheno_tsv)
Expand All @@ -230,6 +242,7 @@ test_that("fn_load_phenotype", {
})

test_that("fn_filter_phenotype", {
set.seed(123)
list_sim = fn_simulate_data(n_pop=3, verbose=TRUE)
list_pheno = fn_load_phenotype(fname_pheno=list_sim$fname_pheno_tsv)
list_pheno$y[1] = Inf
Expand All @@ -241,6 +254,7 @@ test_that("fn_filter_phenotype", {
})

test_that("fn_save_phenotype", {
set.seed(123)
list_sim = fn_simulate_data(n_pop=3, verbose=TRUE)
list_pheno = fn_load_phenotype(fname_pheno=list_sim$fname_pheno_tsv)
fname_tsv = tempfile(fileext=".tsv")
Expand All @@ -259,6 +273,7 @@ test_that("fn_save_phenotype", {
})

test_that("fn_merge_genotype_and_phenotype", {
set.seed(123)
list_sim = fn_simulate_data(n_pop=3, verbose=TRUE)
G = fn_load_genotype(fname_geno=list_sim$fname_geno_vcf)
rownames(G)[1] = "entry_exclude_me"
Expand All @@ -275,6 +290,7 @@ test_that("fn_merge_genotype_and_phenotype", {
})

test_that("fn_subset_merged_genotype_and_phenotype", {
set.seed(123)
list_sim = fn_simulate_data(n_pop=3, verbose=TRUE)
G = fn_load_genotype(fname_geno=list_sim$fname_geno_vcf)
list_pheno = fn_load_phenotype(fname_pheno=list_sim$fname_pheno_tsv)
Expand All @@ -292,6 +308,7 @@ test_that("fn_subset_merged_genotype_and_phenotype", {
})

test_that("fn_estimate_memory_footprint", {
set.seed(123)
X = matrix(0.0, nrow=500, ncol=500e3)
list_mem = fn_estimate_memory_footprint(X=X, verbose=TRUE)
expect_equal(list_mem$size_X, object.size(X))
Expand Down

0 comments on commit 950c259

Please sign in to comment.