From 2ffd7207ff0de9301b68f0dbae92deb62ec1de0a Mon Sep 17 00:00:00 2001 From: pat-s Date: Thu, 11 Apr 2019 22:10:32 +0200 Subject: [PATCH 1/4] new version --- R/Aggregation.R | 4 +- R/BaggingWrapper.R | 21 +- R/BaseEnsemble.R | 12 +- R/BaseEnsemble_operators.R | 10 +- R/BaseWrapper.R | 15 +- R/BaseWrapper_operators.R | 13 +- R/BenchmarkResultOrderLevels.R | 10 +- R/BenchmarkResult_operators.R | 71 ++- R/ChainModel.R | 13 +- R/ChainModel_operators.R | 4 +- R/ClassifTask.R | 17 +- R/ClassificationViaRegressionWrapper.R | 6 + R/ClusterTask.R | 7 +- R/ConstantClassWrapper.R | 13 +- R/CostSensClassifWrapper.R | 9 +- R/CostSensRegrWrapper.R | 5 + R/CostSensTask.R | 21 +- R/CostSensWeightedPairsWrapper.R | 4 + R/DownsampleWrapper.R | 6 +- R/DummyFeaturesWrapper.R | 4 + R/FailureModel.R | 20 +- R/FeatSelControl.R | 17 +- R/FeatSelControlGA.R | 4 +- R/FeatSelResult.R | 7 +- R/FeatSelWrapper.R | 12 +- R/Filter.R | 279 +++++++----- R/FilterWrapper.R | 11 +- R/HoldoutInstance_make_fixed.R | 1 + R/HomogeneousEnsemble.R | 18 +- R/Impute.R | 41 +- R/ImputeMethods.R | 64 ++- R/ImputeWrapper.R | 4 + R/Learner.R | 5 +- R/Learner_operators.R | 9 +- R/Learner_properties.R | 7 + R/Measure.R | 24 +- R/Measure_colAUC.R | 13 +- R/Measure_custom_resampled.R | 2 +- R/Measure_make_cost.R | 16 +- R/Measure_operators.R | 2 + R/Measure_properties.R | 3 + R/ModelMultiplexer.R | 20 +- R/ModelMultiplexerParamSet.R | 16 +- R/MulticlassWrapper.R | 32 +- R/MultilabelBinaryRelevanceWrapper.R | 10 +- R/MultilabelClassifierChainsWrapper.R | 12 +- R/MultilabelDBRWrapper.R | 3 + R/MultilabelNestedStackingWrapper.R | 12 +- R/MultilabelStackingWrapper.R | 7 +- R/MultilabelTask.R | 9 +- R/NoFeaturesModel.R | 8 +- R/OptControl.R | 3 +- R/OptResult.R | 1 + R/OptWrapper.R | 1 + R/OverBaggingWrapper.R | 10 +- R/OverUnderSampling.R | 3 +- R/OverUndersampleWrapper.R | 9 +- R/Prediction.R | 15 +- R/Prediction_operators.R | 45 +- R/PreprocWrapper.R | 14 +- R/PreprocWrapperCaret.R | 19 +- R/RLearner.R | 13 +- R/RLearner_classif_C50.R | 41 +- R/RLearner_classif_IBk.R | 5 +- R/RLearner_classif_J48.R | 5 +- R/RLearner_classif_JRip.R | 5 +- R/RLearner_classif_LiblineaRL1L2SVC.R | 5 +- R/RLearner_classif_LiblineaRL1LogReg.R | 8 +- R/RLearner_classif_LiblineaRL2L1SVC.R | 5 +- R/RLearner_classif_LiblineaRL2LogReg.R | 8 +- R/RLearner_classif_LiblineaRL2SVC.R | 5 +- R/RLearner_classif_LiblineaRMultiClassSVC.R | 5 +- R/RLearner_classif_OneR.R | 5 +- R/RLearner_classif_PART.R | 5 +- R/RLearner_classif_RRF.R | 13 +- R/RLearner_classif_ada.R | 9 +- R/RLearner_classif_adaboostm1.R | 5 +- R/RLearner_classif_bartMachine.R | 7 +- R/RLearner_classif_binomial.R | 5 +- R/RLearner_classif_boosting.R | 4 + R/RLearner_classif_bst.R | 5 +- R/RLearner_classif_cforest.R | 4 + R/RLearner_classif_clusterSVM.R | 3 + R/RLearner_classif_ctree.R | 2 + R/RLearner_classif_cvglmnet.R | 6 +- R/RLearner_classif_dbnDNN.R | 5 +- R/RLearner_classif_dcSVM.R | 6 +- R/RLearner_classif_earth.R | 5 +- R/RLearner_classif_evtree.R | 3 + R/RLearner_classif_extraTrees.R | 6 +- R/RLearner_classif_fdausc.glm.R | 2 + R/RLearner_classif_fdausc.kernel.R | 2 + R/RLearner_classif_fdausc.knn.R | 3 +- R/RLearner_classif_fdausc.np.R | 1 + R/RLearner_classif_featureless.R | 3 + R/RLearner_classif_fnn.R | 7 +- R/RLearner_classif_gamboost.R | 17 +- R/RLearner_classif_gaterSVM.R | 3 + R/RLearner_classif_gausspr.R | 10 +- R/RLearner_classif_gbm.R | 13 +- R/RLearner_classif_geoDA.R | 7 +- R/RLearner_classif_glmboost.R | 17 +- R/RLearner_classif_glmnet.R | 6 +- R/RLearner_classif_h2odeeplearning.R | 16 +- R/RLearner_classif_h2ogbm.R | 10 +- R/RLearner_classif_h2oglm.R | 8 +- R/RLearner_classif_h2orandomForest.R | 10 +- R/RLearner_classif_kknn.R | 13 +- R/RLearner_classif_knn.R | 6 +- R/RLearner_classif_ksvm.R | 17 +- R/RLearner_classif_lda.R | 11 +- R/RLearner_classif_linDA.R | 9 +- R/RLearner_classif_liquidSVM.R | 11 +- R/RLearner_classif_logreg.R | 5 +- R/RLearner_classif_lssvm.R | 12 +- R/RLearner_classif_lvq1.R | 5 +- R/RLearner_classif_mda.R | 5 +- R/RLearner_classif_mlp.R | 5 +- R/RLearner_classif_multinom.R | 7 +- R/RLearner_classif_naiveBayes.R | 5 +- R/RLearner_classif_neuralnet.R | 30 +- R/RLearner_classif_nnTrain.R | 3 + R/RLearner_classif_nnet.R | 22 +- R/RLearner_classif_nodeHarvest.R | 3 + R/RLearner_classif_pamr.R | 5 +- R/RLearner_classif_penalized.R | 7 +- R/RLearner_classif_plr.R | 17 +- R/RLearner_classif_plsdaCaret.R | 7 +- R/RLearner_classif_probit.R | 6 +- R/RLearner_classif_qda.R | 10 +- R/RLearner_classif_quaDA.R | 9 +- R/RLearner_classif_rFerns.R | 4 + R/RLearner_classif_randomForest.R | 17 +- R/RLearner_classif_randomForestSRC.R | 5 + R/RLearner_classif_ranger.R | 5 + R/RLearner_classif_rda.R | 8 +- R/RLearner_classif_rknn.R | 9 +- R/RLearner_classif_rotationForest.R | 11 +- R/RLearner_classif_rpart.R | 6 +- R/RLearner_classif_rrlda.R | 5 +- R/RLearner_classif_saeDNN.R | 5 +- R/RLearner_classif_sda.R | 10 +- R/RLearner_classif_sparseLDA.R | 10 +- R/RLearner_classif_svm.R | 7 +- R/RLearner_classif_xgboost.R | 43 +- R/RLearner_cluster_Cobweb.R | 6 +- R/RLearner_cluster_EM.R | 6 +- R/RLearner_cluster_FarthestFirst.R | 6 +- R/RLearner_cluster_SimpleKMeans.R | 6 +- R/RLearner_cluster_XMeans.R | 6 +- R/RLearner_cluster_cmeans.R | 4 +- R/RLearner_cluster_dbscan.R | 4 +- R/RLearner_cluster_kkmeans.R | 8 +- R/RLearner_cluster_kmeans.R | 4 +- R/RLearner_multilabel_cforest.R | 3 + R/RLearner_multilabel_rFerns.R | 4 +- R/RLearner_multilabel_randomForestSRC.R | 3 + R/RLearner_regr_FDboost.R | 26 +- R/RLearner_regr_GPfit.R | 17 +- R/RLearner_regr_IBk.R | 5 +- R/RLearner_regr_LiblineaRL2L1SVR.R | 7 +- R/RLearner_regr_LiblineaRL2L2SVR.R | 7 +- R/RLearner_regr_RRF.R | 10 +- R/RLearner_regr_bartMachine.R | 4 +- R/RLearner_regr_bcart.R | 3 + R/RLearner_regr_bgp.R | 3 + R/RLearner_regr_bgpllm.R | 3 + R/RLearner_regr_blm.R | 3 + R/RLearner_regr_brnn.R | 3 + R/RLearner_regr_bst.R | 5 +- R/RLearner_regr_btgp.R | 3 + R/RLearner_regr_btgpllm.R | 3 + R/RLearner_regr_btlm.R | 3 + R/RLearner_regr_cforest.R | 4 + R/RLearner_regr_crs.R | 7 +- R/RLearner_regr_ctree.R | 2 + R/RLearner_regr_cubist.R | 3 + R/RLearner_regr_cvglmnet.R | 8 +- R/RLearner_regr_earth.R | 5 +- R/RLearner_regr_evtree.R | 3 + R/RLearner_regr_extraTrees.R | 6 +- R/RLearner_regr_featureless.R | 3 + R/RLearner_regr_fnn.R | 5 +- R/RLearner_regr_frbs.R | 3 + R/RLearner_regr_gamboost.R | 3 + R/RLearner_regr_gausspr.R | 10 +- R/RLearner_regr_gbm.R | 10 +- R/RLearner_regr_glm.R | 10 +- R/RLearner_regr_glmboost.R | 7 +- R/RLearner_regr_glmnet.R | 6 +- R/RLearner_regr_h2odeeplearning.R | 12 +- R/RLearner_regr_h2ogbm.R | 33 +- R/RLearner_regr_h2oglm.R | 5 +- R/RLearner_regr_h2orandomForest.R | 7 +- R/RLearner_regr_kknn.R | 5 +- R/RLearner_regr_km.R | 13 +- R/RLearner_regr_ksvm.R | 8 +- R/RLearner_regr_laGP.R | 3 + R/RLearner_regr_liquidSVM.R | 13 +- R/RLearner_regr_lm.R | 7 +- R/RLearner_regr_mars.R | 5 +- R/RLearner_regr_mob.R | 13 +- R/RLearner_regr_nnet.R | 7 +- R/RLearner_regr_nodeHarvest.R | 3 + R/RLearner_regr_pcr.R | 5 +- R/RLearner_regr_penalized.R | 7 +- R/RLearner_regr_plsr.R | 3 + R/RLearner_regr_randomForest.R | 19 +- R/RLearner_regr_randomForestSRC.R | 5 + R/RLearner_regr_ranger.R | 5 + R/RLearner_regr_rknn.R | 7 +- R/RLearner_regr_rpart.R | 9 +- R/RLearner_regr_rsm.R | 5 +- R/RLearner_regr_rvm.R | 8 +- R/RLearner_regr_slim.R | 3 + R/RLearner_regr_svm.R | 7 +- R/RLearner_regr_xgboost.R | 27 +- R/RLearner_surv_CoxBoost.R | 6 +- R/RLearner_surv_cforest.R | 4 + R/RLearner_surv_coxph.R | 7 +- R/RLearner_surv_cv.CoxBoost.R | 9 +- R/RLearner_surv_cvglmnet.R | 8 +- R/RLearner_surv_gamboost.R | 17 +- R/RLearner_surv_gbm.R | 8 +- R/RLearner_surv_glmboost.R | 7 +- R/RLearner_surv_glmnet.R | 8 +- R/RLearner_surv_randomForestSRC.R | 5 + R/RLearner_surv_ranger.R | 6 +- R/RLearner_surv_rpart.R | 6 +- R/RegrTask.R | 5 +- R/RemoveConstantFeaturesWrapper.R | 3 + R/ResampleDesc.R | 47 +- R/ResampleInstance.R | 28 +- R/ResampleInstances.R | 16 +- R/ResamplePrediction.R | 4 +- R/ResampleResult.R | 1 + R/ResampleResult_operators.R | 34 +- R/StackedLearner.R | 112 +++-- R/SupervisedTask.R | 14 +- R/SurvTask.R | 8 +- R/Task.R | 27 +- R/TaskDesc.R | 1 + R/Task_operators.R | 91 +++- R/TuneControl.R | 16 +- R/TuneControlDesign.R | 1 + R/TuneControlGenSA.R | 8 +- R/TuneControlGrid.R | 1 + R/TuneControlIrace.R | 9 +- R/TuneControlMBO.R | 4 +- R/TuneControlRandom.R | 14 +- R/TuneMultiCritControl.R | 13 +- R/TuneMultiCritControlGrid.R | 1 - R/TuneMultiCritControlMBO.R | 4 +- R/TuneMultiCritControlNSGA2.R | 15 +- R/TuneMultiCritControlRandom.R | 8 +- R/TuneMultiCritResult.R | 6 +- R/TuneResult.R | 8 +- R/TuneWrapper.R | 17 +- R/UnsupervisedTask.R | 7 +- R/WeightedClassesWrapper.R | 15 +- R/WrappedModel.R | 19 +- R/aggregations.R | 23 +- R/analyzeFeatSelResult.R | 23 +- R/asROCRPrediction.R | 4 + R/aucc.R | 8 +- R/batchmark.R | 13 +- R/benchmark.R | 7 +- R/benchmark_helpers.R | 26 +- R/cache_helpers.R | 2 + R/calculateConfusionMatrix.R | 34 +- R/calculateROCMeasures.R | 8 +- R/capLargeValues.R | 11 +- R/checkAggrBeforeResample.R | 7 +- R/checkBMRMeasure.R | 1 + R/checkLearner.R | 9 +- R/checkLearnerBeforeTrain.R | 15 +- R/checkMeasures.R | 4 +- R/checkPrediction.R | 16 +- R/checkTargetPreproc.R | 13 +- R/checkTask.R | 7 +- R/checkTaskSubset.R | 1 + R/checkTunerParset.R | 31 +- R/configureMlr.R | 3 +- R/convertBMRToRankMatrix.R | 6 +- R/convertMLBenchObjToTask.R | 6 +- R/convertX.R | 18 +- R/createDummyFeatures.R | 14 +- R/createSpatialResamplingPlots.R | 86 ++-- R/crossover.R | 1 + R/downsample.R | 7 +- R/dropFeatures.R | 1 + R/estimateResidualVariance.R | 6 +- R/evalOptimizationState.R | 23 +- R/extractFDAFeatures.R | 19 +- R/extractFDAFeaturesMethods.R | 38 +- R/extractFDAFeaturesWrapper.R | 5 +- R/filterFeatures.R | 28 +- R/fixDataForLearner.R | 6 +- R/friedmanPostHocTestBMR.R | 14 +- R/friedmanTestBMR.R | 6 +- R/generateCalibration.R | 39 +- R/generateFeatureImportance.R | 34 +- R/generateFilterValues.R | 54 ++- R/generateHyperParsEffect.R | 111 +++-- R/generateLearningCurve.R | 30 +- R/generatePartialDependence.R | 123 +++--- R/generateThreshVsPerf.R | 78 ++-- R/getCaretParamSet.R | 16 +- R/getClassWeightParam.R | 1 + R/getConfMatrix.R | 1 + R/getFeatSelResult.R | 3 +- R/getFeatureImportance.R | 9 +- R/getFunctionalFeatures.R | 11 +- R/getHyperPars.R | 6 +- R/getMultilabelBinaryPerformances.R | 7 +- R/getNestedTuneResults.R | 3 + R/getOOBPreds.R | 7 +- R/getParamSet.R | 2 + R/getResampleExtract.R | 13 +- R/getResamplingIndices.R | 8 +- R/getTaskConstructorForLearner.R | 1 + R/getTuneResult.R | 2 +- R/getTuneThresholdExtra.R | 2 +- R/hasFunctionalFeatures.R | 5 +- R/helpLearner.R | 38 +- R/helpers.R | 44 +- R/helpers_fda.R | 8 +- R/joinClassLevels.R | 8 +- R/learnerArgsToControl.R | 5 +- R/listLearners.R | 26 +- R/listMeasures.R | 19 +- R/logFunOpt.R | 7 +- R/makeFunctionalData.R | 15 +- R/makeLearner.R | 13 +- R/makeLearners.R | 3 +- R/measures.R | 407 ++++++++++-------- R/mergeBenchmarkResults.R | 13 +- R/mergeSmallFactorLevels.R | 4 +- R/mutateBits.R | 1 + R/options.R | 5 + R/performance.R | 54 ++- R/plotBMRBoxplots.R | 10 +- R/plotBMRRanksAsBarChart.R | 1 + R/plotBMRSummary.R | 4 +- R/plotCritDifferences.R | 56 +-- R/plotLearnerPrediction.R | 33 +- R/plotResiduals.R | 15 +- R/plotTuneMultiCritResult.R | 10 +- R/predict.R | 38 +- R/predictLearner.R | 58 ++- R/relativeOverfitting.R | 5 + R/removeConstantFeatures.R | 18 +- R/removeHyperPars.R | 7 +- R/resample.R | 27 +- R/resample_convenience.R | 9 + R/selectFeatures.R | 12 +- R/selectFeaturesExhaustive.R | 8 +- R/selectFeaturesGA.R | 8 +- R/selectFeaturesRandom.R | 11 +- R/selectFeaturesSequential.R | 19 +- R/setHyperPars.R | 8 +- R/setId.R | 4 +- R/setPredictThreshold.R | 6 +- R/setPredictType.R | 8 +- R/setThreshold.R | 9 +- R/simplifyMeasureNames.R | 1 + R/smote.R | 21 +- R/summarizeColumns.R | 5 + R/summarizeLevels.R | 3 + R/train.R | 35 +- R/trainLearner.R | 2 +- R/tuneCMAES.R | 15 +- R/tuneDesign.R | 1 + R/tuneGenSA.R | 4 +- R/tuneGrid.R | 4 +- R/tuneIrace.R | 19 +- R/tuneMultiCritGrid.R | 6 +- R/tuneMultiCritNSGA2.R | 1 - R/tuneMultiCritRandom.R | 4 +- R/tuneParams.R | 19 +- R/tuneParamsMultiCrit.R | 15 +- R/tuneRandom.R | 1 + R/tuneThreshold.R | 13 +- R/tunerFitnFun.R | 10 +- R/utils.R | 27 +- R/utils_imbalancy.R | 9 +- R/utils_opt.R | 24 +- R/utils_plot.R | 1 + R/zzz.R | 12 +- man/ClassifTask.Rd | 66 +++ man/CostSensTask.Rd | 64 +++ man/MultilabelTask.Rd | 74 ++++ man/RegrTask.Rd | 62 +++ man/SurvTask.Rd | 62 +++ man/makeClusterTask.Rd | 55 +++ tests/run-classif1.R | 1 - tests/run-classif2.R | 1 - tests/run-learners-classif.R | 1 - tests/run-learners-classiflabelswitch.R | 1 - tests/run-learners-cluster.R | 1 - tests/run-learners-general.R | 1 - tests/run-learners-multilabel.R | 3 - tests/run-learners-regr.R | 2 - tests/run-learners-surv.R | 3 - tests/run-lint.R | 1 - tests/run-multilabel.R | 1 - tests/run-parallel.R | 1 - tests/testthat/helper_helpers.R | 83 ++-- tests/testthat/helper_learners_all.R | 29 +- tests/testthat/helper_lint.R | 200 +++++---- tests/testthat/helper_mock_learners.R | 83 ++-- tests/testthat/helper_objects.R | 65 +-- tests/testthat/helper_zzz.R | 1 - tests/testthat/test_base_BaggingWrapper.R | 1 - tests/testthat/test_base_BaseWrapper.R | 10 +- .../testthat/test_base_ConstantClassWrapper.R | 9 +- tests/testthat/test_base_MulticlassWrapper.R | 10 +- tests/testthat/test_base_PreprocWrapper.R | 3 +- tests/testthat/test_base_SupervisedTask.R | 11 +- tests/testthat/test_base_TuneWrapper.R | 3 +- tests/testthat/test_base_UnsupervisedTask.R | 1 - tests/testthat/test_base_benchmark.R | 17 +- tests/testthat/test_base_blocking.R | 1 + tests/testthat/test_base_caching.R | 23 +- .../test_base_calculateConfusionMatrix.R | 89 ++-- .../testthat/test_base_calculateROCMeasures.R | 1 - tests/testthat/test_base_capLargeValues.R | 6 +- tests/testthat/test_base_checkTaskSubset.R | 3 +- tests/testthat/test_base_clustering.R | 8 +- .../test_base_convertBMRToRankMatrix.R | 1 - .../test_base_convertMLBenchObjToTask.R | 3 +- .../testthat/test_base_createDummyFeatures.R | 4 +- .../test_base_createSpatialResamplingPlots.R | 3 - tests/testthat/test_base_debugdump.R | 1 - tests/testthat/test_base_downsample.R | 12 +- tests/testthat/test_base_dropFeatures.R | 2 +- tests/testthat/test_base_fda.R | 58 ++- .../test_base_fda_extractFDAFeatures.R | 29 +- tests/testthat/test_base_fixed_indices_cv.R | 6 +- .../test_base_generateFeatureImportanceData.R | 1 - .../test_base_generateFilterValuesData.R | 8 +- .../test_base_generateHyperParsEffect.R | 2 +- .../test_base_generateLearningCurve.R | 1 - .../testthat/test_base_generateThreshVsPerf.R | 2 +- tests/testthat/test_base_getCaretParamSet.R | 12 +- .../testthat/test_base_getFeatureImportance.R | 12 +- tests/testthat/test_base_getHyperPars.R | 2 +- tests/testthat/test_base_getOOBPreds.R | 2 - tests/testthat/test_base_getTaskData.R | 2 +- tests/testthat/test_base_helpLearner.R | 24 +- tests/testthat/test_base_helpers.R | 4 +- tests/testthat/test_base_imbal_overbagging.R | 64 +-- .../test_base_imbal_overundersample.R | 13 +- tests/testthat/test_base_imbal_smote.R | 13 +- .../test_base_imbal_weightedclasses.R | 23 +- tests/testthat/test_base_impute.R | 8 +- .../testthat/test_base_learnerArgsToControl.R | 12 +- tests/testthat/test_base_makeLearners.R | 1 - tests/testthat/test_base_makeTask.R | 4 +- tests/testthat/test_base_measures.R | 210 ++++----- .../test_base_mergeBenchmarkResults.R | 5 +- tests/testthat/test_base_multilabel.R | 2 +- tests/testthat/test_base_normalizeFeatures.R | 1 - tests/testthat/test_base_performance.R | 6 +- tests/testthat/test_base_plotBMRBoxplots.R | 2 +- .../test_base_plotBMRRanksAsBarChart.R | 2 +- tests/testthat/test_base_plotBMRSummary.R | 1 - .../testthat/test_base_plotCritDifferences.R | 20 +- tests/testthat/test_base_plotResiduals.R | 1 - .../testthat/test_base_relativeOverfitting.R | 1 - tests/testthat/test_base_resample.R | 11 +- tests/testthat/test_base_resample_bs.R | 2 + .../testthat/test_base_resample_convenience.R | 2 +- .../test_base_resample_fixedwindowcv.R | 10 +- .../test_base_resample_getResamplingIndices.R | 3 +- .../test_base_resample_growingwindowcv.R | 8 +- tests/testthat/test_base_resample_operators.R | 5 +- tests/testthat/test_base_resample_stratify.R | 8 +- tests/testthat/test_base_selectFeatures.R | 5 +- tests/testthat/test_base_spcv.R | 16 +- tests/testthat/test_base_tuneThreshold.R | 2 +- tests/testthat/test_base_tuning.R | 8 +- tests/testthat/test_base_weights.R | 1 - tests/testthat/test_basenocran_batchmark.R | 15 +- tests/testthat/test_classif_C50.R | 1 - tests/testthat/test_classif_IBk.R | 3 +- tests/testthat/test_classif_J48.R | 6 +- tests/testthat/test_classif_JRip.R | 5 +- .../test_classif_LibLineaRMultiClassSVC.R | 1 - .../testthat/test_classif_LiblineaRL1L2SVC.R | 1 - .../testthat/test_classif_LiblineaRL1LogReg.R | 1 - .../testthat/test_classif_LiblineaRL2L1SVC.R | 1 - .../testthat/test_classif_LiblineaRL2LogReg.R | 1 - tests/testthat/test_classif_LiblineaRL2SVC.R | 1 - tests/testthat/test_classif_OneR.R | 5 +- tests/testthat/test_classif_PART.R | 6 +- tests/testthat/test_classif_RRF.R | 8 +- tests/testthat/test_classif_adaboostm1.R | 10 +- tests/testthat/test_classif_bartMachine.R | 3 +- tests/testthat/test_classif_binomial.R | 3 +- tests/testthat/test_classif_boost.R | 7 +- tests/testthat/test_classif_clusterSVM.R | 4 +- tests/testthat/test_classif_ctree.R | 3 +- tests/testthat/test_classif_dbnDNN.R | 38 +- tests/testthat/test_classif_earth.R | 1 - tests/testthat/test_classif_evtree.R | 1 - tests/testthat/test_classif_extraTrees.R | 4 +- tests/testthat/test_classif_fdausc.kernel.R | 6 +- tests/testthat/test_classif_fdausc.knn.R | 4 +- tests/testthat/test_classif_fdausc.np.R | 3 +- tests/testthat/test_classif_featureless.R | 1 + tests/testthat/test_classif_fnn.R | 2 + tests/testthat/test_classif_gamboost.R | 4 +- tests/testthat/test_classif_gaterSVM.R | 4 +- tests/testthat/test_classif_gausspr.R | 3 +- tests/testthat/test_classif_gbm.R | 2 +- tests/testthat/test_classif_geoDA.R | 4 +- tests/testthat/test_classif_glmboost.R | 4 +- tests/testthat/test_classif_glmnet.R | 4 +- tests/testthat/test_classif_h2odeeplearning.R | 6 +- tests/testthat/test_classif_h2ogbm.R | 4 +- tests/testthat/test_classif_h2oglm.R | 4 +- tests/testthat/test_classif_h2orandomForest.R | 6 +- tests/testthat/test_classif_kknn.R | 2 + tests/testthat/test_classif_knn.R | 2 + tests/testthat/test_classif_ksvm.R | 7 +- tests/testthat/test_classif_linDA.R | 4 +- tests/testthat/test_classif_liquidSVM.R | 3 +- tests/testthat/test_classif_logreg.R | 6 +- tests/testthat/test_classif_lssvm.R | 4 +- tests/testthat/test_classif_mda.R | 3 +- tests/testthat/test_classif_mlp.R | 1 - tests/testthat/test_classif_multinom.R | 4 +- tests/testthat/test_classif_naiveBayes.R | 2 +- tests/testthat/test_classif_neuralnet.R | 16 +- tests/testthat/test_classif_nnTrain.R | 2 +- tests/testthat/test_classif_nnet.R | 2 +- tests/testthat/test_classif_nodeHarvest.R | 2 +- tests/testthat/test_classif_pamr.R | 6 +- tests/testthat/test_classif_penalized.R | 2 + tests/testthat/test_classif_plsdaCaret.R | 1 - tests/testthat/test_classif_probit.R | 6 +- tests/testthat/test_classif_qda.R | 4 +- tests/testthat/test_classif_quaDA.R | 8 +- tests/testthat/test_classif_randomForest.R | 14 +- tests/testthat/test_classif_ranger.R | 2 +- tests/testthat/test_classif_rda.R | 2 +- tests/testthat/test_classif_rknn.R | 16 +- tests/testthat/test_classif_rotationForest.R | 2 +- tests/testthat/test_classif_rpart.R | 3 +- tests/testthat/test_classif_rrlda.R | 1 - tests/testthat/test_classif_saeDNN.R | 2 +- tests/testthat/test_classif_svm.R | 17 +- tests/testthat/test_classif_xgboost.R | 5 +- tests/testthat/test_cluster_kkmeans.R | 2 +- tests/testthat/test_featsel_FeatSelWrapper.R | 6 +- tests/testthat/test_featsel_FilterWrapper.R | 1 - tests/testthat/test_featsel_fselectorrcpp.R | 44 +- tests/testthat/test_featsel_praznik.R | 9 +- tests/testthat/test_featsel_selectFeatures.R | 1 + .../test_featsel_selectFeaturesSequential.R | 10 +- tests/testthat/test_learners_all_classif.R | 8 +- tests/testthat/test_learners_all_multilabel.R | 1 - .../test_learners_classiflabelswitch.R | 19 +- tests/testthat/test_lint.R | 1 - tests/testthat/test_multilabel_cforest.R | 2 - tests/testthat/test_parallel_all.R | 8 +- tests/testthat/test_regr_FDboost.R | 4 +- tests/testthat/test_regr_GPfit.R | 1 + tests/testthat/test_regr_IBk.R | 2 +- tests/testthat/test_regr_RRF.R | 2 +- tests/testthat/test_regr_bartMachine.R | 5 +- tests/testthat/test_regr_bcart.R | 2 +- tests/testthat/test_regr_brnn.R | 4 +- tests/testthat/test_regr_bst.R | 2 +- tests/testthat/test_regr_btgp.R | 2 +- tests/testthat/test_regr_btgpllm.R | 2 +- tests/testthat/test_regr_btlm.R | 2 +- tests/testthat/test_regr_crs.R | 4 +- tests/testthat/test_regr_ctree.R | 2 +- tests/testthat/test_regr_cubist.R | 2 +- tests/testthat/test_regr_earth.R | 2 +- tests/testthat/test_regr_evtree.R | 1 - tests/testthat/test_regr_fnn.R | 2 + tests/testthat/test_regr_gamboost.R | 4 +- tests/testthat/test_regr_gausspr.R | 1 - tests/testthat/test_regr_gbm.R | 2 +- tests/testthat/test_regr_glm.R | 2 - tests/testthat/test_regr_glmboost.R | 6 +- tests/testthat/test_regr_h2odeeplearning.R | 4 +- tests/testthat/test_regr_h2ogbm.R | 2 +- tests/testthat/test_regr_h2oglm.R | 2 +- tests/testthat/test_regr_h2orandomForest.R | 4 +- tests/testthat/test_regr_kknn.R | 2 + tests/testthat/test_regr_km.R | 2 +- tests/testthat/test_regr_laGP.R | 2 +- tests/testthat/test_regr_liquidSVM.R | 5 +- tests/testthat/test_regr_mob.R | 4 +- tests/testthat/test_regr_nnet.R | 4 +- tests/testthat/test_regr_penalized.R | 6 +- tests/testthat/test_regr_randomForest.R | 1 - tests/testthat/test_regr_ranger.R | 2 +- tests/testthat/test_regr_rknn.R | 16 +- tests/testthat/test_regr_rpart.R | 2 +- tests/testthat/test_regr_slim.R | 2 +- tests/testthat/test_regr_svm.R | 2 +- tests/testthat/test_regr_xgboost.R | 10 +- tests/testthat/test_stack.R | 7 +- tests/testthat/test_surv_CoxBoost.R | 4 +- tests/testthat/test_surv_coxph.R | 2 +- tests/testthat/test_surv_cv.CoxBoost.R | 3 +- tests/testthat/test_surv_cvglmnet.R | 2 +- tests/testthat/test_surv_gamboost.R | 3 +- tests/testthat/test_surv_gbm.R | 2 +- tests/testthat/test_surv_glmboost.R | 2 +- tests/testthat/test_surv_glmnet.R | 2 +- tests/testthat/test_surv_measures.R | 3 +- tests/testthat/test_tuneParams.R | 2 +- tests/testthat/test_tune_ModelMultiplexer.R | 24 +- .../testthat/test_tune_getTuneResultOptPath.R | 7 +- tests/testthat/test_tune_tuneGrid.R | 1 - tests/testthat/test_tune_tuneIrace.R | 6 +- .../testthat/test_tune_tuneParamsMultiCrit.R | 23 +- tests/testthat/test_tune_tuneThreshold.R | 3 +- 624 files changed, 5066 insertions(+), 2689 deletions(-) create mode 100644 man/ClassifTask.Rd create mode 100644 man/CostSensTask.Rd create mode 100644 man/MultilabelTask.Rd create mode 100644 man/RegrTask.Rd create mode 100644 man/SurvTask.Rd create mode 100644 man/makeClusterTask.Rd diff --git a/R/Aggregation.R b/R/Aggregation.R index 7b91698ecc..6de1c4ce8a 100644 --- a/R/Aggregation.R +++ b/R/Aggregation.R @@ -60,9 +60,10 @@ NULL #' # computes the interquartile range on all performance values #' test.iqr = makeAggregation(id = "test.iqr", name = "Test set interquartile range", #' properties = "req.test", -#' fun = function (task, perf.test, perf.train, measure, group, pred) IQR(perf.test)) +#' fun = function(task, perf.test, perf.train, measure, group, pred) IQR(perf.test)) #' @export makeAggregation = function(id, name = id, properties, fun) { + assertString(id) assertString(name) makeS3Obj("Aggregation", id = id, name = name, fun = fun, properties = properties) @@ -70,5 +71,6 @@ makeAggregation = function(id, name = id, properties, fun) { #' @export print.Aggregation = function(x, ...) { + catf("Aggregation function: %s", x$id) } diff --git a/R/BaggingWrapper.R b/R/BaggingWrapper.R index ec03087cf4..493832386a 100644 --- a/R/BaggingWrapper.R +++ b/R/BaggingWrapper.R @@ -42,6 +42,7 @@ #' @family wrapper #' @export makeBaggingWrapper = function(learner, bw.iters = 10L, bw.replace = TRUE, bw.size, bw.feats = 1) { + learner = checkLearner(learner, type = c("classif", "regr")) pv = list() if (!missing(bw.iters)) { @@ -60,8 +61,9 @@ makeBaggingWrapper = function(learner, bw.iters = 10L, bw.replace = TRUE, bw.siz assertNumber(bw.feats, lower = 0, upper = 1) pv$bw.feats = bw.feats } - if (learner$predict.type != "response") + if (learner$predict.type != "response") { stop("Predict type of the basic learner must be 'response'.") + } id = stri_paste(learner$id, "bagged", sep = ".") packs = learner$package ps = makeParamSet( @@ -76,6 +78,7 @@ makeBaggingWrapper = function(learner, bw.iters = 10L, bw.replace = TRUE, bw.siz #' @export print.BaggingModel = function(x, ...) { + s = capture.output(print.WrappedModel(x)) u = sprintf("Bagged Learner: %s", class(x$learner$next.learner)[1L]) s = append(s, u, 1L) @@ -86,8 +89,9 @@ print.BaggingModel = function(x, ...) { trainLearner.BaggingWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, bw.iters = 10, bw.replace = TRUE, bw.size, bw.feats = 1, ...) { - if (missing(bw.size)) + if (missing(bw.size)) { bw.size = if (bw.replace) 1 else 0.632 + } .task = subsetTask(.task, subset = .subset) n = getTaskSize(.task) # number of observations to sample @@ -104,6 +108,7 @@ trainLearner.BaggingWrapper = function(.learner, .task, .subset = NULL, .weights } doBaggingTrainIteration = function(i, n, m, k, bw.replace, task, learner, weights) { + setSlaveOptions() bag = sample(seq_len(n), m, replace = bw.replace) task = subsetTask(task, features = sample(getTaskFeatureNames(task), k, replace = FALSE)) @@ -112,21 +117,25 @@ doBaggingTrainIteration = function(i, n, m, k, bw.replace, task, learner, weight #' @export predictLearner.BaggingWrapper = function(.learner, .model, .newdata, .subset = NULL, ...) { + models = getLearnerModel(.model, more.unwrap = FALSE) g = if (.learner$type == "classif") as.character else identity p = asMatrixCols(lapply(models, function(m) { + nd = .newdata[, m$features, drop = FALSE] g(predict(m, newdata = nd, subset = .subset, ...)$data$response) })) if (.learner$predict.type == "response") { - if (.learner$type == "classif") + if (.learner$type == "classif") { as.factor(apply(p, 1L, computeMode)) - else + } else { rowMeans(p) + } } else { if (.learner$type == "classif") { levs = .model$task.desc$class.levels p = apply(p, 1L, function(x) { + x = factor(x, levels = levs) # we need all level for the table and we need them in consistent order! as.numeric(prop.table(table(x))) }) @@ -141,12 +150,14 @@ predictLearner.BaggingWrapper = function(.learner, .model, .newdata, .subset = N # be response, we can estimates probs and se on the outside #' @export setPredictType.BaggingWrapper = function(learner, predict.type) { + setPredictType.Learner(learner, predict.type) } #' @export getLearnerProperties.BaggingWrapper = function(learner) { - switch(learner$type, + + switch(learner$type, "classif" = union(getLearnerProperties(learner$next.learner), "prob"), "regr" = union(getLearnerProperties(learner$next.learner), "se") ) diff --git a/R/BaseEnsemble.R b/R/BaseEnsemble.R index cbe5eeb6fb..20f0fe292d 100644 --- a/R/BaseEnsemble.R +++ b/R/BaseEnsemble.R @@ -18,19 +18,23 @@ makeBaseEnsemble = function(id, base.learners, bls.type = NULL, base.learners = lapply(base.learners, checkLearner, type = bls.type) tt = unique(extractSubList(base.learners, "type")) - if (length(tt) > 1L) + if (length(tt) > 1L) { stopf("Base learners must all be of same type, but have: %s", collapse(tt)) - if (is.null(ens.type)) + } + if (is.null(ens.type)) { ens.type = tt + } ids = unique(extractSubList(base.learners, "id")) - if (length(ids) != length(base.learners)) + if (length(ids) != length(base.learners)) { stop("Base learners must all have unique ids!") + } # check that all predict.types are the same pts = unique(extractSubList(base.learners, "predict.type")) - if (length(pts) > 1L) + if (length(pts) > 1L) { stopf("Base learners must all have same predict.type, but have: %s", collapse(pts)) + } # join all parsets of base.learners + prefix param names with base learner id # (we could also do this operation on-the.fly in getParamSet.BaseEnsemble, diff --git a/R/BaseEnsemble_operators.R b/R/BaseEnsemble_operators.R index 86bf05a9fb..489d381fc7 100644 --- a/R/BaseEnsemble_operators.R +++ b/R/BaseEnsemble_operators.R @@ -1,5 +1,6 @@ # find the learner for a given param name, so . matchBaseEnsembleLearner = function(ensemble, pn) { + patterns = stri_paste("^", names(ensemble$base.learners), "\\.") j = which(vlapply(patterns, stri_detect_regex, str = pn)) par.id = stri_replace_first(pn, "", regex = patterns[j]) @@ -8,10 +9,13 @@ matchBaseEnsembleLearner = function(ensemble, pn) { #' @export getHyperPars.BaseEnsemble = function(learner, for.fun = c("train", "predict", "both")) { + pvs = lapply(learner$base.learners, function(lrn) { + xs = getHyperPars(lrn, for.fun = for.fun) - if (length(xs) > 0L) + if (length(xs) > 0L) { names(xs) = stri_paste(lrn$id, ".", names(xs)) + } return(xs) }) # if we dont do this, R prefixes the list names again. @@ -24,6 +28,7 @@ getHyperPars.BaseEnsemble = function(learner, for.fun = c("train", "predict", "b # set hyper pars down in ensemble base learners, identify correct base learner + remove prefix #' @export setHyperPars2.BaseEnsemble = function(learner, par.vals) { + ns = names(par.vals) parnames.bls = names(learner$par.set.bls$pars) for (i in seq_along(par.vals)) { @@ -43,6 +48,7 @@ setHyperPars2.BaseEnsemble = function(learner, par.vals) { #' @export removeHyperPars.BaseEnsemble = function(learner, ids) { + parnames.bls = names(learner$par.set.bls$pars) for (id in ids) { if (id %in% parnames.bls) { @@ -63,6 +69,7 @@ removeHyperPars.BaseEnsemble = function(learner, ids) { # if one does not want this, one must override #' @export setPredictType.BaseEnsemble = function(learner, predict.type) { + # this does the check for the prop lrn = setPredictType.Learner(learner, predict.type) lrn$base.learners = lapply(lrn$base.learners, setPredictType, predict.type = predict.type) @@ -71,6 +78,7 @@ setPredictType.BaseEnsemble = function(learner, predict.type) { #' @export makeWrappedModel.BaseEnsemble = function(learner, learner.model, task.desc, subset, features, factor.levels, time) { + x = NextMethod(x) addClasses(x, "BaseEnsembleModel") } diff --git a/R/BaseWrapper.R b/R/BaseWrapper.R index 93966dcb7a..4484e84f17 100644 --- a/R/BaseWrapper.R +++ b/R/BaseWrapper.R @@ -19,11 +19,14 @@ #' @export makeBaseWrapper = function(id, type, next.learner, package = character(0L), par.set = makeParamSet(), par.vals = list(), learner.subclass, model.subclass, cache = FALSE) { - if (inherits(next.learner, "OptWrapper") && is.element("TuneWrapper", learner.subclass)) + + if (inherits(next.learner, "OptWrapper") && is.element("TuneWrapper", learner.subclass)) { stop("Cannot wrap a tuning wrapper around another optimization wrapper!") + } ns = intersect(names(par.set$pars), names(next.learner$par.set$pars)) - if (length(ns) > 0L) + if (length(ns) > 0L) { stopf("Hyperparameter names in wrapper clash with base learner names: %s", collapse(ns)) + } learner = makeLearnerBaseConstructor(classes = c(learner.subclass, "BaseWrapper"), id = id, @@ -43,6 +46,7 @@ makeBaseWrapper = function(id, type, next.learner, package = character(0L), par. #' @export print.BaseWrapper = function(x, ...) { + s = "" y = x while (inherits(y, "BaseWrapper")) { @@ -68,6 +72,7 @@ print.BaseWrapper = function(x, ...) { #' @export predictLearner.BaseWrapper = function(.learner, .model, .newdata, ...) { + args = removeFromDots(names(.learner$par.vals), ...) do.call(predictLearner, c( list(.learner = .learner$next.learner, .model = .model$learner.model$next.model, .newdata = .newdata), @@ -77,6 +82,7 @@ predictLearner.BaseWrapper = function(.learner, .model, .newdata, ...) { #' @export makeWrappedModel.BaseWrapper = function(learner, learner.model, task.desc, subset = NULL, features, factor.levels, time) { + x = NextMethod() addClasses(x, c(learner$model.subclass, "BaseWrapperModel")) } @@ -85,22 +91,25 @@ makeWrappedModel.BaseWrapper = function(learner, learner.model, task.desc, subse #' @export isFailureModel.BaseWrapperModel = function(model) { + return(!inherits(model$learner.model, "NoFeaturesModel") && isFailureModel(model$learner.model$next.model)) } #' @export getFailureModelMsg.BaseWrapperModel = function(model) { + return(getFailureModelMsg(model$learner.model$next.model)) } #' @export getFailureModelDump.BaseWrapperModel = function(model) { + return(getFailureModelDump(model$learner.model$next.model)) } #' @export getLearnerProperties.BaseWrapper = function(learner) { + # set properties by default to what the resulting type is allowed and what the base learner can do intersect(listLearnerProperties(learner$type), getLearnerProperties(learner$next.learner)) } - diff --git a/R/BaseWrapper_operators.R b/R/BaseWrapper_operators.R index 0a7f40525e..1dbbfe78c3 100644 --- a/R/BaseWrapper_operators.R +++ b/R/BaseWrapper_operators.R @@ -1,17 +1,20 @@ #' @export getParamSet.BaseWrapper = function(x) { + c(x$par.set, getParamSet(x$next.learner)) } #' @export getHyperPars.BaseWrapper = function(learner, for.fun = c("train", "predict", "both")) { + c(getHyperPars(learner$next.learner, for.fun), getHyperPars.Learner(learner, for.fun)) } #' @export setHyperPars2.BaseWrapper = function(learner, par.vals) { + ns = names(par.vals) pds.n = names(learner$par.set$pars) for (i in seq_along(par.vals)) { @@ -26,9 +29,11 @@ setHyperPars2.BaseWrapper = function(learner, par.vals) { #' @export removeHyperPars.BaseWrapper = function(learner, ids) { + i = intersect(names(learner$par.vals), ids) - if (length(i) > 0L) + if (length(i) > 0L) { learner = removeHyperPars.Learner(learner, i) + } learner$next.learner = removeHyperPars(learner$next.learner, setdiff(ids, i)) return(learner) } @@ -36,8 +41,10 @@ removeHyperPars.BaseWrapper = function(learner, ids) { getLeafLearner = function(learner) { - if (inherits(learner, "BaseWrapper")) + + if (inherits(learner, "BaseWrapper")) { return(getLeafLearner(learner$next.learner)) + } return(learner) } @@ -46,7 +53,7 @@ getLeafLearner = function(learner) { # if one does not want this, one must override #' @export setPredictType.BaseWrapper = function(learner, predict.type) { + learner$next.learner = setPredictType(learner$next.learner, predict.type) setPredictType.Learner(learner, predict.type) } - diff --git a/R/BenchmarkResultOrderLevels.R b/R/BenchmarkResultOrderLevels.R index 8bfa619bf4..2854306139 100644 --- a/R/BenchmarkResultOrderLevels.R +++ b/R/BenchmarkResultOrderLevels.R @@ -2,8 +2,10 @@ # useful for plotting in ggplot2 # if order.tsks is NULL, just return the df orderBMRTasks = function(bmr, df = NULL, order.tsks) { - if (is.null(df)) + + if (is.null(df)) { df = as.data.frame(bmr) + } if (!is.null(order.tsks)) { assertCharacter(order.tsks, len = length(getBMRTaskIds(bmr))) assertSetEqual(order.tsks, getBMRTaskIds(bmr), ordered = FALSE) @@ -15,9 +17,11 @@ orderBMRTasks = function(bmr, df = NULL, order.tsks) { # order levels of learner.ids of a BenchmarkResult or similar data.frame # useful for plotting in ggplot2 # if order.tsks is NULL, just return the df -orderBMRLrns = function(bmr, df = NULL, order.lrns){ - if (is.null(df)) +orderBMRLrns = function(bmr, df = NULL, order.lrns) { + + if (is.null(df)) { df = as.data.frame(bmr) + } if (!is.null(order.lrns)) { assertCharacter(order.lrns, len = length(getBMRLearnerIds(bmr))) assertSetEqual(order.lrns, getBMRLearnerIds(bmr), ordered = FALSE) diff --git a/R/BenchmarkResult_operators.R b/R/BenchmarkResult_operators.R index bb08874a05..26407858b9 100644 --- a/R/BenchmarkResult_operators.R +++ b/R/BenchmarkResult_operators.R @@ -8,6 +8,7 @@ #' @export #' @family benchmark getBMRTaskIds = function(bmr) { + assertClass(bmr, "BenchmarkResult") return(names(bmr$results)) } @@ -22,6 +23,7 @@ getBMRTaskIds = function(bmr) { #' @export #' @family benchmark getBMRLearners = function(bmr) { + assertClass(bmr, "BenchmarkResult") return(bmr$learners) } @@ -36,6 +38,7 @@ getBMRLearners = function(bmr) { #' @export #' @family benchmark getBMRLearnerIds = function(bmr) { + assertClass(bmr, "BenchmarkResult") extractSubList(bmr$learners, "id", use.names = FALSE) } @@ -50,6 +53,7 @@ getBMRLearnerIds = function(bmr) { #' @export #' @family benchmark getBMRLearnerShortNames = function(bmr) { + assertClass(bmr, "BenchmarkResult") vcapply(bmr$learners, getLearnerShortName, use.names = FALSE) } @@ -64,6 +68,7 @@ getBMRLearnerShortNames = function(bmr) { #' @export #' @family benchmark getBMRMeasures = function(bmr) { + assertClass(bmr, "BenchmarkResult") return(bmr$measures) } @@ -78,6 +83,7 @@ getBMRMeasures = function(bmr) { #' @export #' @family benchmark getBMRMeasureIds = function(bmr) { + assertClass(bmr, "BenchmarkResult") extractSubList(bmr$measures, "id", use.names = FALSE) } @@ -85,30 +91,37 @@ getBMRMeasureIds = function(bmr) { # returns buried object in BMR, either as list of lists or data.frame with task.id, learner.id cols # you can restrict to subsets for tasks and learners and pass function to extract object getBMRObjects = function(bmr, task.ids = NULL, learner.ids = NULL, fun, as.df = FALSE, drop = FALSE) { + assertClass(bmr, "BenchmarkResult") brtids = getBMRTaskIds(bmr) brlids = getBMRLearnerIds(bmr) - if (is.null(task.ids)) + if (is.null(task.ids)) { task.ids = brtids - else + } else { assertSubset(task.ids, brtids) - if (is.null(learner.ids)) + } + if (is.null(learner.ids)) { learner.ids = brlids - else + } else { assertSubset(learner.ids, brlids) + } res = lapply(task.ids, function(tid) { + xs = lapply(learner.ids, function(lid) { + p = fun(bmr$results[[tid]][[lid]]) if (as.df) { - if (!is.null(p)) + if (!is.null(p)) { p = as.data.frame(cbind(task.id = tid, learner.id = lid, p)) + } } return(p) }) - if (as.df) + if (as.df) { xs = setDF(rbindlist(xs, fill = TRUE)) - else + } else { xs = setNames(xs, learner.ids) + } return(xs) }) if (as.df) { @@ -122,12 +135,15 @@ getBMRObjects = function(bmr, task.ids = NULL, learner.ids = NULL, fun, as.df = drop.learners = length(learner.ids) == 1L if (drop.tasks | drop.learners) { res = unlist(res, recursive = FALSE) - if (drop.tasks & drop.learners) + if (drop.tasks & drop.learners) { res = res[[1L]] - if (drop.tasks & !drop.learners) + } + if (drop.tasks & !drop.learners) { res = setNames(res, learner.ids) - if (!drop.tasks & drop.learners) + } + if (!drop.tasks & drop.learners) { res = setNames(res, task.ids) + } } } } @@ -156,11 +172,13 @@ getBMRObjects = function(bmr, task.ids = NULL, learner.ids = NULL, fun, as.df = #' @family benchmark getBMRPredictions = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = FALSE, drop = FALSE) { + assertClass(bmr, "BenchmarkResult") - f = if (as.df) + f = if (as.df) { function(x) as.data.frame(getRRPredictions(x)) - else + } else { function(x) getRRPredictions(x) + } getBMRObjects(bmr, task.ids, learner.ids, fun = f, as.df = as.df, drop = drop) } @@ -182,6 +200,7 @@ getBMRPredictions = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = F #' @export #' @family benchmark getBMRPerformances = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = FALSE, drop = FALSE) { + assertClass(bmr, "BenchmarkResult") f = function(x) x$measures.test getBMRObjects(bmr, task.ids, learner.ids, fun = f, as.df = as.df, drop = drop) @@ -204,11 +223,13 @@ getBMRPerformances = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = #' @export #' @family benchmark getBMRAggrPerformances = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = FALSE, drop = FALSE) { + assertClass(bmr, "BenchmarkResult") - f = if (as.df) + f = if (as.df) { function(x) as.data.frame(as.list(x$aggr)) - else + } else { function(x) x$aggr + } getBMRObjects(bmr, task.ids, learner.ids, fun = f, as.df = as.df, drop = drop) } @@ -218,6 +239,7 @@ getBMROptResults = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = FA f = if (as.df) { function(x) { + if (inherits(x$learner, wrapper.class)) { xs = lapply(x$extract, fun) xs = setDF(rbindlist(lapply(seq_along(xs), function(i) cbind(iter = i, xs[[i]])), fill = TRUE)) @@ -227,10 +249,12 @@ getBMROptResults = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = FA } } else { function(x) { - if (inherits(x$learner, wrapper.class)) + + if (inherits(x$learner, wrapper.class)) { x$extract - else + } else { NULL + } } } getBMRObjects(bmr, task.ids, learner.ids, fun = f, as.df = as.df, drop = drop) @@ -250,8 +274,10 @@ getBMROptResults = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = FA #' @export #' @family benchmark getBMRTuneResults = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = FALSE, drop = FALSE) { + assertClass(bmr, "BenchmarkResult") getBMROptResults(bmr, task.ids, learner.ids, as.df, "TuneWrapper", function(x) { + data.frame(x$x, as.list(x$y)) }, drop = drop) } @@ -272,8 +298,10 @@ getBMRTuneResults = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = F #' @export #' @family benchmark getBMRFeatSelResults = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = FALSE, drop = FALSE) { + assertClass(bmr, "BenchmarkResult") getBMROptResults(bmr, task.ids, learner.ids, as.df, "FeatSelWrapper", function(x) { + as.data.frame(x$x) }, drop = drop) } @@ -294,8 +322,10 @@ getBMRFeatSelResults = function(bmr, task.ids = NULL, learner.ids = NULL, as.df #' @export #' @family benchmark getBMRFilteredFeatures = function(bmr, task.ids = NULL, learner.ids = NULL, as.df = FALSE, drop = FALSE) { + assertClass(bmr, "BenchmarkResult") getBMROptResults(bmr, task.ids, learner.ids, as.df, "FilterWrapper", function(x) { + as.data.frame(x) }, drop = drop) } @@ -315,8 +345,10 @@ getBMRFilteredFeatures = function(bmr, task.ids = NULL, learner.ids = NULL, as.d #' @export #' @family benchmark getBMRModels = function(bmr, task.ids = NULL, learner.ids = NULL, drop = FALSE) { + assertClass(bmr, "BenchmarkResult") f = function(x) { + x$models } getBMRObjects(bmr, task.ids, learner.ids, fun = f, as.df = FALSE, drop = drop) @@ -330,8 +362,9 @@ getBMRModels = function(bmr, task.ids = NULL, learner.ids = NULL, drop = FALSE) #' @return ([list]). #' @export getBMRTaskDescriptions = function(bmr) { - .Deprecated("getBMRTaskDesc") - getBMRTaskDescs(bmr) + + .Deprecated("getBMRTaskDesc") + getBMRTaskDescs(bmr) } @@ -344,6 +377,6 @@ getBMRTaskDescriptions = function(bmr) { #' @export #' @family benchmark getBMRTaskDescs = function(bmr) { + lapply(bmr$results, function(x) lapply(x, getRRTaskDesc)) } - diff --git a/R/ChainModel.R b/R/ChainModel.R index f66512d523..9496fd0347 100644 --- a/R/ChainModel.R +++ b/R/ChainModel.R @@ -7,23 +7,28 @@ #' @keywords internal #' @export makeChainModel = function(next.model, cl) { + setClasses(list(next.model = next.model), c(cl, "ChainModel", "WrappedModel")) } -#'@export +#' @export getLearnerModel.BaseWrapperModel = function(model, more.unwrap = FALSE) { + # FIXME: this structure and special-cases really suck. FailureModel and NoFeaturesModel # should probably be redesigned at some point - if (inherits(model$learner.model, "NoFeaturesModel")) + if (inherits(model$learner.model, "NoFeaturesModel")) { return(model$learner.model) - if (more.unwrap) + } + if (more.unwrap) { getLearnerModel(model$learner.model$next.model, more.unwrap = TRUE) - else + } else { model$learner.model$next.model + } } #' @export print.ChainModel = function(x, ...) { + print(x$next.model) } diff --git a/R/ChainModel_operators.R b/R/ChainModel_operators.R index a5a4ba5360..9416b94cac 100644 --- a/R/ChainModel_operators.R +++ b/R/ChainModel_operators.R @@ -1,5 +1,7 @@ getLeafModel = function(model) { - if (inherits(model, "BaseWrapperModel")) + + if (inherits(model, "BaseWrapperModel")) { return(getLeafModel(model$learner.model$next.model)) + } return(model) } diff --git a/R/ClassifTask.R b/R/ClassifTask.R index 3c998ef744..24c9d5fc52 100644 --- a/R/ClassifTask.R +++ b/R/ClassifTask.R @@ -1,12 +1,14 @@ #' @export #' @rdname Task makeClassifTask = function(id = deparse(substitute(data)), data, target, weights = NULL, blocking = NULL, coordinates = NULL, positive = NA_character_, fixup.data = "warn", check.data = TRUE) { + assertString(id) assertDataFrame(data) assertString(target) # some code on cran passed stuff like positive=1, we can live with the convert here - if (isScalarNumeric(positive)) + if (isScalarNumeric(positive)) { positive = as.character(positive) + } assertString(positive, na.ok = TRUE) assertChoice(fixup.data, choices = c("no", "quiet", "warn")) @@ -34,14 +36,17 @@ makeClassifTask = function(id = deparse(substitute(data)), data, target, weights #' @export #' @rdname makeTaskDesc makeClassifTaskDesc = function(id, data, target, weights, blocking, positive, coordinates) { + levs = levels(data[[target]]) m = length(levs) if (is.na(positive)) { - if (m <= 2L) + if (m <= 2L) { positive = levs[1L] + } } else { - if (m > 2L) + if (m > 2L) { stop("Cannot set a positive class for a multiclass problem!") + } assertChoice(positive, choices = levs) } td = makeTaskDescInternal("classif", id, data, target, weights, blocking, coordinates) @@ -49,15 +54,17 @@ makeClassifTaskDesc = function(id, data, target, weights, blocking, positive, co td$positive = positive td$negative = NA_character_ td$class.distribution = table(data[target]) - if (length(td$class.levels) == 1L) + if (length(td$class.levels) == 1L) { td$negative = stri_paste("not_", positive) - else if (length(td$class.levels) == 2L) + } else if (length(td$class.levels) == 2L) { td$negative = setdiff(td$class.levels, positive) + } return(addClasses(td, c("ClassifTaskDesc", "SupervisedTaskDesc"))) } #' @export print.ClassifTask = function(x, ...) { + di = printToChar(x$task.desc$class.distribution) m = length(x$task.desc$class.levels) print.SupervisedTask(x) diff --git a/R/ClassificationViaRegressionWrapper.R b/R/ClassificationViaRegressionWrapper.R index 0be613c27f..3d0d2db8d4 100644 --- a/R/ClassificationViaRegressionWrapper.R +++ b/R/ClassificationViaRegressionWrapper.R @@ -19,6 +19,7 @@ #' mod = train(lrn, sonar.task, subset = 1:140) #' predictions = predict(mod, newdata = getTaskData(sonar.task)[141:208, 1:60]) makeClassificationViaRegressionWrapper = function(learner, predict.type = "response") { + learner = checkLearner(learner, "regr") lrn = makeBaseWrapper( id = stri_paste(learner$id, "classify", sep = "."), @@ -36,6 +37,7 @@ makeClassificationViaRegressionWrapper = function(learner, predict.type = "respo #' @export trainLearner.ClassificationViaRegressionWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, ...) { + pos = getTaskDesc(.task)$positive td = getTaskData(.task, target.extra = TRUE, subset = .subset) target.name = stri_paste(pos, "prob", sep = ".") @@ -53,6 +55,7 @@ trainLearner.ClassificationViaRegressionWrapper = function(.learner, .task, .sub #' @export predictLearner.ClassificationViaRegressionWrapper = function(.learner, .model, .newdata, .subset = NULL, ...) { + model = getLearnerModel(.model, more.unwrap = FALSE) p = predict(model, newdata = .newdata, subset = .subset, ...)$data$response @@ -67,6 +70,7 @@ predictLearner.ClassificationViaRegressionWrapper = function(.learner, .model, . #' @export getLearnerProperties.ClassificationViaRegressionWrapper = function(learner) { + props = getLearnerProperties(learner$next.learner) props = union(props, c("twoclass", "prob")) intersect(props, mlr$learner.properties$classif) @@ -74,11 +78,13 @@ getLearnerProperties.ClassificationViaRegressionWrapper = function(learner) { #' @export setPredictType.ClassificationViaRegressionWrapper = function(learner, predict.type) { + assertChoice(predict.type, c("response", "prob")) learner$predict.type = predict.type } #' @export isFailureModel.ClassificationViaRegressionModel = function(model) { + isFailureModel(getLearnerModel(model, more.unwrap = FALSE)) } diff --git a/R/ClusterTask.R b/R/ClusterTask.R index 879618f214..6ba058c9e1 100644 --- a/R/ClusterTask.R +++ b/R/ClusterTask.R @@ -1,14 +1,15 @@ #' @rdname Task #' @export makeClusterTask = function(id = deparse(substitute(data)), data, weights = NULL, blocking = NULL, coordinates = NULL, fixup.data = "warn", check.data = TRUE) { + assertString(id) assertDataFrame(data) assertChoice(fixup.data, choices = c("no", "quiet", "warn")) assertFlag(check.data) task = makeUnsupervisedTask("cluster", data = data, weights = weights, - blocking = blocking, fixup.data = fixup.data, - check.data = check.data, coordinates = coordinates) + blocking = blocking, fixup.data = fixup.data, + check.data = check.data, coordinates = coordinates) task$task.desc = makeClusterTaskDesc(id, data, weights, blocking, coordinates) addClasses(task, "ClusterTask") } @@ -16,6 +17,7 @@ makeClusterTask = function(id = deparse(substitute(data)), data, weights = NULL, #' @export #' @rdname makeTaskDesc makeClusterTaskDesc = function(id, data, weights, blocking, coordinates) { + target = character(0L) td = makeTaskDescInternal("cluster", id, data, target, weights, blocking, coordinates) return(addClasses(td, c("ClusterTaskDesc", "UnsupervisedTaskDesc"))) @@ -23,5 +25,6 @@ makeClusterTaskDesc = function(id, data, weights, blocking, coordinates) { #' @export print.ClusterTask = function(x, ...) { + print.UnsupervisedTask(x) } diff --git a/R/ConstantClassWrapper.R b/R/ConstantClassWrapper.R index 63ac98283a..723e34d974 100644 --- a/R/ConstantClassWrapper.R +++ b/R/ConstantClassWrapper.R @@ -12,6 +12,7 @@ #' @family wrapper #' @export makeConstantClassWrapper = function(learner, frac = 0) { + learner = checkLearner(learner, "classif") lrn = makeBaseWrapper( @@ -31,14 +32,15 @@ makeConstantClassWrapper = function(learner, frac = 0) { #' @export trainLearner.ConstantClassWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, frac = 0, ...) { + labels.distribution = sort(prop.table(table(getTaskTargets(subsetTask(.task, .subset)))), decreasing = TRUE) most.frequent = labels.distribution[1L] if (most.frequent >= (1 - frac)) { mod = makeS3Obj("ConstantClassModelConstant", - label = factor(names(most.frequent)), - levels = .task$task.desc$class.levels) + label = factor(names(most.frequent)), + levels = .task$task.desc$class.levels) m = makeWrappedModel.Learner(.learner, mod, getTaskDesc(.task), .subset, - getTaskFeatureNames(.task), getTaskFactorLevels(.task), 0) + getTaskFeatureNames(.task), getTaskFactorLevels(.task), 0) } else { m = train(.learner$next.learner, .task, .subset, weights = .weights) } @@ -48,13 +50,14 @@ trainLearner.ConstantClassWrapper = function(.learner, .task, .subset = NULL, .w #' @export predictLearner.ConstantClassWrapper = function(.learner, .model, .newdata, ...) { + mod = .model$learner.model$next.model$learner.model if (inherits(mod, "ConstantClassModelConstant")) { switch(.learner$predict.type, response = rep.int(mod$label, nrow(.newdata)), prob = matrix(as.numeric(mod$levels == mod$label), - ncol = length(mod$levels), nrow = nrow(.newdata), - byrow = TRUE, dimnames = list(NULL, mod$levels)) + ncol = length(mod$levels), nrow = nrow(.newdata), + byrow = TRUE, dimnames = list(NULL, mod$levels)) ) } else { NextMethod() diff --git a/R/CostSensClassifWrapper.R b/R/CostSensClassifWrapper.R index 613bf84c5c..9c9c6e8008 100644 --- a/R/CostSensClassifWrapper.R +++ b/R/CostSensClassifWrapper.R @@ -17,6 +17,7 @@ #' @family wrapper #' @aliases CostSensClassifWrapper CostSensClassifModel makeCostSensClassifWrapper = function(learner) { + learner = checkLearner(learner, "classif") learner = setPredictType(learner, "response") id = stri_paste("costsens", learner$id, sep = ".") @@ -26,6 +27,7 @@ makeCostSensClassifWrapper = function(learner) { #' @export trainLearner.CostSensClassifWrapper = function(.learner, .task, .subset = NULL, ...) { + # note that no hyperpars can be in ..., they would refer to the wrapper .task = subsetTask(.task, subset = .subset) feats = getTaskData(.task) @@ -54,17 +56,18 @@ trainLearner.CostSensClassifWrapper = function(.learner, .task, .subset = NULL, #' @export predictLearner.CostSensClassifWrapper = function(.learner, .model, .newdata, ...) { + m = .model$learner.model$next.model mm = m$learner.model # handle constant prediction - if (inherits(mm, "CostSensClassifModelConstant")) + if (inherits(mm, "CostSensClassifModelConstant")) { return(as.factor(rep(mm$y, nrow(.newdata)))) + } NextMethod() } #' @export getLearnerProperties.CostSensClassifWrapper = function(learner) { + setdiff(getLearnerProperties(learner$next.learner), c("weights", "prob")) } - - diff --git a/R/CostSensRegrWrapper.R b/R/CostSensRegrWrapper.R index b5eb628bc6..8605fefaac 100644 --- a/R/CostSensRegrWrapper.R +++ b/R/CostSensRegrWrapper.R @@ -14,6 +14,7 @@ #' @family wrapper #' @aliases CostSensRegrWrapper CostSensRegrModel makeCostSensRegrWrapper = function(learner) { + learner = checkLearner(learner, "regr") # we cannot make use of 'se' here learner = setPredictType(learner, "response") @@ -24,6 +25,7 @@ makeCostSensRegrWrapper = function(learner) { #' @export trainLearner.CostSensRegrWrapper = function(.learner, .task, .subset = NULL, ...) { + # note that no hyperpars can be in ..., they would refer to the wrapper .task = subsetTask(.task, subset = .subset) d = getTaskData(.task) @@ -34,6 +36,7 @@ trainLearner.CostSensRegrWrapper = function(.learner, .task, .subset = NULL, ... } doCostSensRegrTrainIteration = function(learner, cl, costs, d) { + setSlaveOptions() data = cbind(d, ..y.. = costs[, cl]) task = makeRegrTask(id = cl, data = data, target = "..y..", check.data = FALSE, fixup.data = "quiet") @@ -42,6 +45,7 @@ doCostSensRegrTrainIteration = function(learner, cl, costs, d) { #' @export predictLearner.CostSensRegrWrapper = function(.learner, .model, .newdata, ...) { + p = predictHomogeneousEnsemble(.learner, .model, .newdata, ...) # get class per row with minimal estimated costs p = apply(p, 1L, getMinIndex) @@ -52,5 +56,6 @@ predictLearner.CostSensRegrWrapper = function(.learner, .model, .newdata, ...) { #' @export getLearnerProperties.CostSensRegrWrapper = function(learner) { + setdiff(getLearnerProperties(learner$next.learner), c("weights", "prob")) } diff --git a/R/CostSensTask.R b/R/CostSensTask.R index 82e4b23721..8f7d72ac1a 100644 --- a/R/CostSensTask.R +++ b/R/CostSensTask.R @@ -2,6 +2,7 @@ #' @rdname Task #' @family costsens makeCostSensTask = function(id = deparse(substitute(data)), data, costs, blocking = NULL, coordinates = NULL, fixup.data = "warn", check.data = TRUE) { + assertString(id) assertDataFrame(data) assertChoice(fixup.data, choices = c("no", "quiet", "warn")) @@ -13,24 +14,28 @@ makeCostSensTask = function(id = deparse(substitute(data)), data, costs, blockin if (fixup.data != "no") { assert(checkMatrix(costs), checkDataFrame(costs)) - if (is.data.frame(costs)) + if (is.data.frame(costs)) { costs = as.matrix(costs) - if (is.null(colnames(costs))) + } + if (is.null(colnames(costs))) { colnames(costs) = stri_paste("y", seq_col(costs)) + } } task = makeSupervisedTask("costsens", data = data, target = target, - weights = weights, blocking = blocking, - coordinates = coordinates, fixup.data = fixup.data, - check.data = check.data) + weights = weights, blocking = blocking, + coordinates = coordinates, fixup.data = fixup.data, + check.data = check.data) if (check.data) { assertMatrix(costs, any.missing = FALSE, col.names = "strict") assertNumeric(costs, lower = 0) - if (nrow(costs) != nrow(data)) + if (nrow(costs) != nrow(data)) { stopf("Number of rows in cost matrix (%s) should equal the number of observations (%s).", nrow(costs), nrow(data)) + } # we use ..y.. later in the models as a name for temp labels - if ("..y.." %in% c(colnames(data), colnames(costs))) + if ("..y.." %in% c(colnames(data), colnames(costs))) { stopf("The name '..y..' is currently reserved for costsens tasks. You can use it neither for features nor labels!") + } } task$task.desc = makeCostSensTaskDesc(id, data, target, blocking, costs, coordinates) @@ -40,6 +45,7 @@ makeCostSensTask = function(id = deparse(substitute(data)), data, costs, blockin #' @export #' @rdname makeTaskDesc makeCostSensTaskDesc = function(id, data, target, blocking, costs, coordinates) { + td = makeTaskDescInternal("costsens", id, data, target, weights = NULL, blocking = blocking, coordinates) td$class.levels = colnames(costs) td$costs = costs @@ -48,6 +54,7 @@ makeCostSensTaskDesc = function(id, data, target, blocking, costs, coordinates) #' @export print.CostSensTask = function(x, ...) { + print.SupervisedTask(x, print.target = FALSE, print.weights = FALSE) levs = x$task.desc$class.levels catf("Classes: %i\n%s", length(levs), clipString(collapse(levs, sep = ", "), 30L)) diff --git a/R/CostSensWeightedPairsWrapper.R b/R/CostSensWeightedPairsWrapper.R index b2c01e9604..e4b5a99e39 100644 --- a/R/CostSensWeightedPairsWrapper.R +++ b/R/CostSensWeightedPairsWrapper.R @@ -27,6 +27,7 @@ #' @family costsens #' @aliases CostSensWeightedPairsWrapper CostSensWeightedPairsModel makeCostSensWeightedPairsWrapper = function(learner) { + learner = checkLearner(learner, "classif", props = "weights") learner = setPredictType(learner, "response") id = stri_paste("costsens", learner$id, sep = ".") @@ -36,6 +37,7 @@ makeCostSensWeightedPairsWrapper = function(learner) { #' @export trainLearner.CostSensWeightedPairsWrapper = function(.learner, .task, .subset = NULL, ...) { + # note that no hyperpars can be in ..., they would refer to the wrapper .task = subsetTask(.task, subset = .subset) costs = getTaskCosts(.task) @@ -70,6 +72,7 @@ trainLearner.CostSensWeightedPairsWrapper = function(.learner, .task, .subset = #' @export predictLearner.CostSensWeightedPairsWrapper = function(.learner, .model, .newdata, ...) { + classes = .model$task.desc$class.levels preds = predictHomogeneousEnsemble(.learner, .model, .newdata, ...) factor(apply(preds, 1L, computeMode), levels = classes) @@ -77,5 +80,6 @@ predictLearner.CostSensWeightedPairsWrapper = function(.learner, .model, .newdat #' @export getLearnerProperties.CostSensWeightedPairsWrapper = function(learner) { + setdiff(getLearnerProperties(learner$next.learner), c("weights", "prob")) } diff --git a/R/DownsampleWrapper.R b/R/DownsampleWrapper.R index bde98a0154..a0aee140df 100644 --- a/R/DownsampleWrapper.R +++ b/R/DownsampleWrapper.R @@ -16,11 +16,12 @@ #' @family wrapper #' @export makeDownsampleWrapper = function(learner, dw.perc = 1, dw.stratify = FALSE) { + learner = checkLearner(learner) pv = list() if (!missing(dw.perc)) { assertNumber(dw.perc, na.ok = FALSE, lower = 0, upper = 1) - if (dw.perc == 0){ + if (dw.perc == 0) { stopf("You can't downsample %s to 0", learner$id) } pv$dw.perc = dw.perc @@ -41,11 +42,12 @@ makeDownsampleWrapper = function(learner, dw.perc = 1, dw.stratify = FALSE) { #' @export trainLearner.DownsampleWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, dw.perc = 1, dw.stratify = FALSE, ...) { + # If weights vector length fits to task size, set weights before subsetting (Issue #838) if (length(.weights) == getTaskSize(.task)) { .task$weights = .weights .task = subsetTask(.task, .subset) - # otherwise subset first and then set weights + # otherwise subset first and then set weights } else { .task = subsetTask(.task, .subset) .task$weights = .weights diff --git a/R/DummyFeaturesWrapper.R b/R/DummyFeaturesWrapper.R index a35ab24e6e..77c8a59215 100644 --- a/R/DummyFeaturesWrapper.R +++ b/R/DummyFeaturesWrapper.R @@ -10,16 +10,19 @@ #' @family wrapper #' @export makeDummyFeaturesWrapper = function(learner, method = "1-of-n", cols = NULL) { + learner = checkLearner(learner) args = list(method = method, cols = cols) rm(list = names(args)) trainfun = function(data, target, args) { + data = do.call(createDummyFeatures, c(list(obj = data, target = target), args)) return(list(data = data, control = list())) } predictfun = function(data, target, args, control) { + y = intersect(target, colnames(data)) data = do.call(createDummyFeatures, c(list(obj = data, target = y), args)) return(data) @@ -31,5 +34,6 @@ makeDummyFeaturesWrapper = function(learner, method = "1-of-n", cols = NULL) { } getLearnerProperties.DummyFeaturesWrapper = function(learner) { + union(getLearnerProperties(learner$next.learner), c("factors", "ordered")) } diff --git a/R/FailureModel.R b/R/FailureModel.R index 26960d76dd..6d4f9ce09d 100644 --- a/R/FailureModel.R +++ b/R/FailureModel.R @@ -34,26 +34,30 @@ NULL predictFailureModel = function(model, newdata) { + lrn = model$learner type = lrn$type ptype = lrn$predict.type n = nrow(newdata) if (type == "classif") { levs = model$task.desc$class.levels - res = if (ptype == "response") + res = if (ptype == "response") { factor(rep(NA_character_, n), levels = levs) - else + } else { matrix(NA_real_, nrow = n, ncol = length(levs), dimnames = list(NULL, levs)) + } } else if (type == "regr") { - res = if (ptype == "response") + res = if (ptype == "response") { rep(NA_real_, n) - else + } else { matrix(NA_real_, nrow = n, ncol = 2L, dimnames = list(NULL, c("response", "se"))) + } } else if (type == "surv") { - if (ptype == "response") + if (ptype == "response") { res = rep.int(NA_real_, n) - else + } else { stop("Predict type 'prob' for survival not yet supported") + } } else if (type == "costsens") { levs = model$task.desc$class.levels res = factor(rep(NA_character_, n), levels = levs) @@ -65,21 +69,25 @@ predictFailureModel = function(model, newdata) { #' @export print.FailureModel = function(x, ...) { + print.WrappedModel(x) catf("Training failed: %s", getFailureModelMsg(x)) } #' @export isFailureModel.FailureModel = function(model) { + return(TRUE) } #' @export getFailureModelMsg.FailureModel = function(model) { + return(as.character(model$learner.model)) } #' @export getFailureModelDump.FailureModel = function(model) { + return(model$dump) } diff --git a/R/FeatSelControl.R b/R/FeatSelControl.R index 7950b91c16..ab11100486 100644 --- a/R/FeatSelControl.R +++ b/R/FeatSelControl.R @@ -105,10 +105,11 @@ makeFeatSelControl = function(same.resampling.instance, impute.val = NULL, maxit maxit = asCount(maxit, na.ok = TRUE, positive = TRUE) max.features = asCount(max.features, na.ok = TRUE, positive = TRUE) - if (identical(log.fun, "default")) + if (identical(log.fun, "default")) { log.fun = logFunFeatSel - else if (identical(log.fun, "memory")) + } else if (identical(log.fun, "memory")) { log.fun = logFunTuneMemory + } x = makeOptControl(same.resampling.instance, impute.val, tune.threshold, tune.threshold.args, log.fun, ...) x$maxit = maxit x$max.features = max.features @@ -117,18 +118,20 @@ makeFeatSelControl = function(same.resampling.instance, impute.val = NULL, maxit #' @export print.FeatSelControl = function(x, ...) { + catf("FeatSel control: %s", class(x)[1]) catf("Same resampling instance: %s", x$same.resampling.instance) catf("Imputation value: %s", ifelse(is.null(x$impute.val), "", sprintf("%g", x$impute.val))) - if (is.na(x$max.features)) + if (is.na(x$max.features)) { catf("Max. features: ") - else + } else { catf("Max. features: %i", x$max.features) + } catf("Max. iterations: %i", x$maxit) catf("Tune threshold: %s", x$tune.threshold) - if (length(x$extra.args)) + if (length(x$extra.args)) { catf("Further arguments: %s", convertToShortString(x$extra.args)) - else + } else { catf("Further arguments: ") + } } - diff --git a/R/FeatSelControlGA.R b/R/FeatSelControlGA.R index c739988012..ef5c9a50fc 100644 --- a/R/FeatSelControlGA.R +++ b/R/FeatSelControlGA.R @@ -8,10 +8,10 @@ makeFeatSelControlGA = function(same.resampling.instance = TRUE, impute.val = NU maxit = asCount(maxit, positive = TRUE) assertFlag(comma) mu = asCount(mu, positive = TRUE) - if (missing(lambda)) { + if (missing(lambda)) { lambda = if (comma) 2L * mu else round(mu / 2L) } else { - lam.low = if (comma) mu else 1L + lam.low = if (comma) mu else 1L lambda = asInt(lambda, lower = lam.low) } assertNumber(crossover.rate, lower = 0, upper = 1) diff --git a/R/FeatSelResult.R b/R/FeatSelResult.R index e7dd31dd5c..da80c46e1d 100644 --- a/R/FeatSelResult.R +++ b/R/FeatSelResult.R @@ -22,9 +22,11 @@ NULL #' @export print.FeatSelResult = function(x, ...) { + catf("FeatSel result:") shortenX = function(x) { + clipString(collapse(x, ", "), 50L) } @@ -32,8 +34,9 @@ print.FeatSelResult = function(x, ...) { catf("Bits (%i): %s", length(x$x.bit.names), shortenX(x$x.bit.names)) } catf("Features (%i): %s", length(x$x), shortenX(x$x)) - if (!is.null(x$threshold)) + if (!is.null(x$threshold)) { catf("Threshold: %s", collapse(sprintf("%2.2f", x$threshold))) + } catf("%s", perfsToString(x$y)) } @@ -48,5 +51,3 @@ makeFeatSelResultFromOptPath = function(learner, measures, resampling, control, x = bits.to.features(x.bits, task) makeOptResult(learner, control, x, e$y, resampling, threshold, opt.path, "FeatSelResult", x.bit.names = x.bit.names) } - - diff --git a/R/FeatSelWrapper.R b/R/FeatSelWrapper.R index 948c8012bd..04bd5d0288 100644 --- a/R/FeatSelWrapper.R +++ b/R/FeatSelWrapper.R @@ -53,18 +53,20 @@ makeFeatSelWrapper = function(learner, resampling, measures, bit.names, bits.to. } #' @export -trainLearner.FeatSelWrapper = function(.learner, .task, .subset = NULL, ...) { +trainLearner.FeatSelWrapper = function(.learner, .task, .subset = NULL, ...) { + task = subsetTask(.task, .subset) - if (length(.learner$bit.names) == 0) - #FIXME: really look at bitnames / bits.to.features stuff and test it. + if (length(.learner$bit.names) == 0) { + # FIXME: really look at bitnames / bits.to.features stuff and test it. # do we need the extra case here? or = selectFeatures(.learner$next.learner, task, .learner$resampling, measures = .learner$measures, control = .learner$control, show.info = .learner$show.info) - else + } else { or = selectFeatures(.learner$next.learner, task, .learner$resampling, measures = .learner$measures, bit.names = .learner$bit.names, bits.to.features = .learner$bits.to.features, control = .learner$control, show.info = .learner$show.info) + } task = subsetTask(task, features = or$x) m = train(.learner$next.learner, task) x = makeChainModel(next.model = m, cl = "FeatSelModel") @@ -74,7 +76,7 @@ trainLearner.FeatSelWrapper = function(.learner, .task, .subset = NULL, ...) { #' @export predictLearner.FeatSelWrapper = function(.learner, .model, .newdata, ...) { + .newdata = .newdata[, .model$learner.model$opt.result$x, drop = FALSE] predictLearner(.learner$next.learner, .model$learner.model$next.model, .newdata, ...) } - diff --git a/R/Filter.R b/R/Filter.R index 56a2ec2db1..c35f72e52a 100644 --- a/R/Filter.R +++ b/R/Filter.R @@ -1,4 +1,4 @@ -.FilterRegister = new.env() # nolint +.FilterRegister = new.env() # nolint #' Create a feature filter. #' @@ -28,13 +28,14 @@ #' @export #' @family filter makeFilter = function(name, desc, pkg, supported.tasks, supported.features, fun) { + assertString(name) assertString(desc) assertCharacter(pkg, any.missing = FALSE) assertCharacter(supported.tasks, any.missing = FALSE) assertCharacter(supported.features, any.missing = FALSE) assertFunction(fun, c("task", "nselect")) - obj = makeS3Obj("Filter", + obj = makeS3Obj("Filter", name = name, desc = desc, pkg = pkg, @@ -66,7 +67,9 @@ makeFilter = function(name, desc, pkg, supported.tasks, supported.features, fun) #' @export #' @family filter listFilterMethods = function(desc = TRUE, tasks = FALSE, features = FALSE, include.deprecated = FALSE) { + tag2df = function(tags, prefix = "") { + unique.tags = sort(unique(unlist(tags))) res = asMatrixRows(lapply(tags, "%in%", x = unique.tags)) colnames(res) = stri_paste(prefix, unique.tags) @@ -85,33 +88,41 @@ listFilterMethods = function(desc = TRUE, tasks = FALSE, features = FALSE, inclu description = extractSubList(filters, "desc") - if (desc) + if (desc) { df$desc = description - if (tasks) + } + if (tasks) { df = cbind(df, tag2df(extractSubList(filters, "supported.tasks"), prefix = "task.")) - if (features) + } + if (features) { df = cbind(df, tag2df(extractSubList(filters, "supported.features"), prefix = "feature.")) + } deprecated = stri_endswith(description, fixed = "(DEPRECATED)") - if (include.deprecated) + if (include.deprecated) { df$deprecated = deprecated - else + } else { df = df[!deprecated, ] + } res = setRowNames(sortByCol(df, "id"), NULL) addClasses(res, "FilterMethodsList") } #' @export print.FilterMethodsList = function(x, len = 40, ...) { - if (!is.null(x$desc)) + + if (!is.null(x$desc)) { x$desc = clipString(x$desc, len = len) + } NextMethod() } #' @export print.Filter = function(x, ...) { + catf("Filter: '%s'", x$name) - if (!isScalarNA(x$pkg)) + if (!isScalarNA(x$pkg)) { catf("Packages: '%s'", collapse(cleanupPackageNames(x$pkg))) + } catf("Supported tasks: %s", collapse(x$supported.tasks)) catf("Supported features: %s", collapse(x$supported.features)) } @@ -130,10 +141,11 @@ NULL makeFilter( name = "mrmr", desc = "Minimum redundancy, maximum relevance filter", - pkg = "mRMRe", + pkg = "mRMRe", supported.tasks = c("regr", "surv"), supported.features = c("numerics", "ordered"), fun = function(task, nselect, ...) { + if (inherits(task, "SurvTask")) { data = getTaskData(task, target.extra = TRUE, recode.target = "surv") data = cbind(..surv = data$target, data$data) @@ -154,8 +166,7 @@ makeFilter( res = mRMRe::mRMR.classic(data = data, target_indices = target.ind, feature_count = nselect, ...) scores = as.numeric(mRMRe::scores(res)[[1L]]) setNames(scores, res@feature_names[as.integer(mRMRe::solutions(res)[[1L]])]) - } -) + }) # carscore ---------------- @@ -170,15 +181,15 @@ NULL makeFilter( name = "carscore", desc = "CAR scores", - pkg = "care", + pkg = "care", supported.tasks = "regr", supported.features = "numerics", fun = function(task, nselect, ...) { + data = getTaskData(task, target.extra = TRUE) y = care::carscore(Xtrain = data$data, Ytrain = data$target, verbose = FALSE, ...)^2 setNames(as.double(y), names(y)) - } -) + }) # randomForestSRC_importance ---------------- @@ -199,11 +210,12 @@ NULL rf.importance = makeFilter( name = "randomForestSRC_importance", desc = "Importance of random forests fitted in package 'randomForestSRC'. Importance is calculated using argument 'permute'.", - pkg = "randomForestSRC", + pkg = "randomForestSRC", supported.tasks = c("classif", "regr", "surv"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, nselect, method = "permute", ...) { - assertChoice(method, choices = c("permute", "random", "anti", "permute.ensemble", "random.ensemble", "anti.ensemble")) + + assertChoice(method, choices = c("permute", "random", "anti", "permute.ensemble", "random.ensemble", "anti.ensemble")) im = randomForestSRC::rfsrc(getTaskFormula(task), data = getTaskData(task), proximity = FALSE, forest = FALSE, importance = method, ...)$importance if (inherits(task, "ClassifTask")) { @@ -214,11 +226,11 @@ rf.importance = makeFilter( y = unname(im) } setNames(y, ns) - } -) + }) .FilterRegister[["rf.importance"]] = rf.importance .FilterRegister[["rf.importance"]]$desc = "(DEPRECATED)" .FilterRegister[["rf.importance"]]$fun = function(...) { + .Deprecated(old = "Filter 'rf.importance'", new = "Filter 'randomForestSRC_importance' (package randomForestSRC)") .FilterRegister[["randomForestSRC_importance"]]$fun(...) } @@ -226,11 +238,12 @@ rf.importance = makeFilter( randomForestSRC.rfsrc = makeFilter( name = "randomForestSRC_importance", desc = "Importance of random forests fitted in package 'randomForestSRC'. Importance is calculated using argument 'permute'.", - pkg = "randomForestSRC", + pkg = "randomForestSRC", supported.tasks = c("classif", "regr", "surv"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, nselect, method = "permute", ...) { - assertChoice(method, choices = c("permute", "random", "anti", "permute.ensemble", "random.ensemble", "anti.ensemble")) + + assertChoice(method, choices = c("permute", "random", "anti", "permute.ensemble", "random.ensemble", "anti.ensemble")) im = randomForestSRC::rfsrc(getTaskFormula(task), data = getTaskData(task), proximity = FALSE, forest = FALSE, importance = method, ...)$importance if (inherits(task, "ClassifTask")) { @@ -241,11 +254,11 @@ randomForestSRC.rfsrc = makeFilter( y = unname(im) } setNames(y, ns) - } -) + }) .FilterRegister[["randomForestSRC.rfsrc"]] = randomForestSRC.rfsrc .FilterRegister[["randomForestSRC.rfsrc"]]$desc = "(DEPRECATED)" .FilterRegister[["randomForestSRC.rfsrc"]]$fun = function(...) { + .Deprecated(old = "Filter 'randomForestSRC.rfsrc'", new = "Filter 'randomForestSRC_importance' (package randomForestSRC)") .FilterRegister[["randomForestSRC_importance"]]$fun(...) } @@ -266,18 +279,19 @@ NULL rf.min.depth = makeFilter( name = "randomForestSRC_var.select", desc = "Minimal depth of / variable hunting via method var.select on random forests fitted in package 'randomForestSRC'.", - pkg = "randomForestSRC", + pkg = "randomForestSRC", supported.tasks = c("classif", "regr", "surv"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, nselect, method = "md", ...) { + im = randomForestSRC::var.select(getTaskFormula(task), getTaskData(task), method = method, verbose = FALSE, ...)$md.obj$order setNames(-im[, 1L], rownames(im)) - } -) + }) .FilterRegister[["rf.min.depth"]] = rf.min.depth .FilterRegister[["rf.min.depth"]]$desc = "(DEPRECATED)" .FilterRegister[["rf.min.depth"]]$fun = function(...) { + .Deprecated(old = "Filter 'rf.min.depth'", new = "Filter 'randomForestSRC_var.select'") .FilterRegister[["randomForestSRC_var.select"]]$fun(...) } @@ -285,18 +299,19 @@ rf.min.depth = makeFilter( randomForestSRC.var.select = makeFilter( name = "randomForestSRC_var.select", desc = "Minimal depth of / variable hunting via method var.select on random forests fitted in package 'randomForestSRC'.", - pkg = "randomForestSRC", + pkg = "randomForestSRC", supported.tasks = c("classif", "regr", "surv"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, nselect, method = "md", ...) { + im = randomForestSRC::var.select(getTaskFormula(task), getTaskData(task), method = method, verbose = FALSE, ...)$md.obj$order setNames(-im[, 1L], rownames(im)) - } -) + }) .FilterRegister[["randomForestSRC.var.select"]] = randomForestSRC.var.select .FilterRegister[["randomForestSRC.var.select"]]$desc = "(DEPRECATED)" .FilterRegister[["randomForestSRC.var.select"]]$fun = function(...) { + .Deprecated(old = "Filter 'randomForestSRC.var.select'", new = "Filter 'randomForestSRC_var.select' (package randomForestSRC)") .FilterRegister[["randomForestSRC_var.select"]]$fun(...) } @@ -319,12 +334,14 @@ makeFilter( supported.tasks = c("classif", "regr", "surv"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, nselect, mtry = 5L, ...) { + args = list(...) # we need to set mtry, which is 5 by default in cforest, to p if p < mtry # otherwise we get a warning p = getTaskNFeats(task) - if (p < mtry) + if (p < mtry) { args$mtry = p + } cforest.args = as.list(base::args(party::cforest)) cforest.args = args[names(args) %in% names(cforest.args)] control.args = as.list(base::args(party::cforest_control)) @@ -333,11 +350,10 @@ makeFilter( varimp.args = args[names(args) %in% names(varimp.args)] ctrl = do.call(party::cforest_unbiased, control.args) fit = do.call(party::cforest, c(list(formula = getTaskFormula(task), data = getTaskData(task), controls = ctrl), - cforest.args)) + cforest.args)) im = do.call(party::varimp, c(list(obj = fit), varimp.args)) im - } -) + }) cforest.importance = makeFilter( name = "party_cforest.importance", @@ -346,12 +362,14 @@ cforest.importance = makeFilter( supported.tasks = c("classif", "regr", "surv"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, nselect, mtry = 5L, ...) { + args = list(...) # we need to set mtry, which is 5 by default in cforest, to p if p < mtry # otherwise we get a warning p = getTaskNFeats(task) - if (p < mtry) + if (p < mtry) { args$mtry = p + } cforest.args = as.list(base::args(party::cforest)) cforest.args = args[names(args) %in% names(cforest.args)] control.args = as.list(base::args(party::cforest_control)) @@ -363,12 +381,12 @@ cforest.importance = makeFilter( cforest.args)) im = do.call(party::varimp, c(list(obj = fit), varimp.args)) im - } -) + }) .FilterRegister[["cforest.importance"]] = cforest.importance .FilterRegister[["cforest.importance"]]$desc = "(DEPRECATED)" .FilterRegister[["cforest.importance"]]$fun = function(...) { + .Deprecated(old = "Filter 'cforest.importance'", new = "Filter 'party_cforest.importance' (package party)") .FilterRegister[["party_cforest.importance"]]$fun(...) } @@ -394,6 +412,7 @@ makeFilter( supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, method = "oob.accuracy", ...) { + assertChoice(method, choices = c("oob.accuracy", "node.impurity")) type = if (method == "oob.accuracy") 1L else 2L # no need to set importance = TRUE for node impurity (type = 2) @@ -401,8 +420,7 @@ makeFilter( keep.forest = FALSE, importance = (type != 2L)) im = randomForest::importance(rf, type = type, ...) setNames(im, rownames(im)) - } -) + }) randomForest.importance = makeFilter( name = "randomForest_importance", @@ -411,6 +429,7 @@ randomForest.importance = makeFilter( supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, method = "oob.accuracy", ...) { + assertChoice(method, choices = c("oob.accuracy", "node.impurity")) type = if (method == "oob.accuracy") 1L else 2L # no need to set importance = TRUE for node impurity (type = 2) @@ -418,12 +437,12 @@ randomForest.importance = makeFilter( keep.forest = FALSE, importance = (type != 2L)) im = randomForest::importance(rf, type = type, ...) setNames(im, rownames(im)) - } -) + }) .FilterRegister[["randomForest.importance"]] = randomForest.importance .FilterRegister[["randomForest.importance"]]$desc = "(DEPRECATED)" .FilterRegister[["randomForest.importance"]]$fun = function(...) { + .Deprecated(old = "Filter 'randomForest.importance'", new = "Filter 'randomForest_importance' (package randomForest)") .FilterRegister[["randomForest_importance"]]$fun(...) } @@ -440,14 +459,14 @@ NULL makeFilter( name = "linear.correlation", desc = "Pearson correlation between feature and target", - pkg = character(0L), + pkg = character(0L), supported.tasks = "regr", supported.features = "numerics", fun = function(task, nselect, ...) { + data = getTaskData(task, target.extra = TRUE) abs(cor(as.matrix(data$data), data$target, use = "pairwise.complete.obs", method = "pearson")[, 1L]) - } -) + }) # rank.correlation ---------------- @@ -461,14 +480,14 @@ NULL makeFilter( name = "rank.correlation", desc = "Spearman's correlation between feature and target", - pkg = character(0L), + pkg = character(0L), supported.tasks = "regr", supported.features = "numerics", fun = function(task, nselect, ...) { + data = getTaskData(task, target.extra = TRUE) abs(cor(as.matrix(data$data), data$target, use = "pairwise.complete.obs", method = "spearman")[, 1L]) - } -) + }) # FSelector_information.gain ---------------- @@ -480,30 +499,31 @@ makeFilter( makeFilter( name = "FSelector_information.gain", desc = "Entropy-based information gain between feature and target", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::information.gain(getTaskFormula(task), data = getTaskData(task)) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) information.gain = makeFilter( name = "FSelector_information.gain", desc = "Entropy-based information gain between feature and target", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::information.gain(getTaskFormula(task), data = getTaskData(task)) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) .FilterRegister[["information.gain"]] = information.gain .FilterRegister[["information.gain"]]$desc = "(DEPRECATED)" .FilterRegister[["information.gain"]]$fun = function(...) { + .Deprecated(old = "Filter 'information.gain'", new = "Filter 'FSelector_information.gain' (package FSelector)") .FilterRegister[["FSelector_information.gain"]]$fun(...) } @@ -519,30 +539,31 @@ information.gain = makeFilter( makeFilter( name = "FSelector_gain.ratio", desc = "Entropy-based gain ratio between feature and target", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::gain.ratio(getTaskFormula(task), data = getTaskData(task)) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) gain.ratio = makeFilter( name = "FSelector_gain.ratio", desc = "Entropy-based gain ratio between feature and target", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::gain.ratio(getTaskFormula(task), data = getTaskData(task)) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) .FilterRegister[["gain.ratio"]] = gain.ratio .FilterRegister[["gain.ratio"]]$desc = "(DEPRECATED)" .FilterRegister[["gain.ratio"]]$fun = function(...) { + .Deprecated(old = "Filter 'gain.ratio'", new = "Filter 'FSelector_gain.ratio' (package FSelector)") .FilterRegister[["FSelector_gain.ratio"]]$fun(...) } @@ -557,30 +578,31 @@ gain.ratio = makeFilter( makeFilter( name = "FSelector_symmetrical.uncertainty", desc = "Entropy-based symmetrical uncertainty between feature and target", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::symmetrical.uncertainty(getTaskFormula(task), data = getTaskData(task)) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) symmetrical.uncertainty = makeFilter( name = "FSelector_symmetrical.uncertainty", desc = "Entropy-based symmetrical uncertainty between feature and target", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::symmetrical.uncertainty(getTaskFormula(task), data = getTaskData(task)) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) .FilterRegister[["symmetrical.uncertainty"]] = symmetrical.uncertainty .FilterRegister[["symmetrical.uncertainty"]]$desc = "(DEPRECATED)" .FilterRegister[["symmetrical.uncertainty"]]$fun = function(...) { + .Deprecated(old = "Filter 'symmetrical.uncertainty'", new = "Filter 'FSelector_symmetrical.uncertainty' (package FSelector)") .FilterRegister[["FSelector_symmetrical.uncertainty"]]$fun(...) } @@ -601,30 +623,31 @@ NULL makeFilter( name = "FSelector_chi.squared", desc = "Chi-squared statistic of independence between feature and target", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::chi.squared(getTaskFormula(task), data = getTaskData(task)) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) chi.squared = makeFilter( name = "FSelector_gain.ratio", desc = "Chi-squared statistic of independence between feature and target", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::chi.squared(getTaskFormula(task), data = getTaskData(task)) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) .FilterRegister[["chi.squared"]] = chi.squared .FilterRegister[["chi.squared"]]$desc = "(DEPRECATED)" .FilterRegister[["chi.squared"]]$fun = function(...) { + .Deprecated(old = "Filter 'chi.squared'", new = "Filter 'FSelector_chi.squared' (package FSelector)") .FilterRegister[["FSelector_chi.squared"]]$fun(...) } @@ -652,30 +675,31 @@ NULL makeFilter( name = "FSelector_relief", desc = "RELIEF algorithm", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::relief(getTaskFormula(task), data = getTaskData(task), ...) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) relief = makeFilter( name = "FSelector_relief", desc = "RELIEF algorithm", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::relief(getTaskFormula(task), data = getTaskData(task), ...) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) .FilterRegister[["relief"]] = relief .FilterRegister[["relief"]]$desc = "(DEPRECATED)" .FilterRegister[["relief"]]$fun = function(...) { + .Deprecated(old = "Filter 'relief'", new = "Filter 'FSelector_relief' (package FSelector)") .FilterRegister[["FSelector_relief"]]$fun(...) } @@ -695,30 +719,31 @@ NULL makeFilter( name = "FSelector_oneR", desc = "oneR association rule", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::oneR(getTaskFormula(task), data = getTaskData(task)) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) oneR = makeFilter( name = "FSelector_oneR", desc = "oneR association rule", - pkg = "FSelector", + pkg = "FSelector", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + y = FSelector::oneR(getTaskFormula(task), data = getTaskData(task)) setNames(y[["attr_importance"]], getTaskFeatureNames(task)) - } -) + }) .FilterRegister[["oneR"]] = oneR .FilterRegister[["oneR"]]$desc = "(DEPRECATED)" .FilterRegister[["oneR"]]$fun = function(...) { + .Deprecated(old = "Filter 'oneR'", new = "Filter 'FSelector_oneR' (package FSelector)") .FilterRegister[["FSelector_oneR"]]$fun(...) } @@ -738,30 +763,35 @@ NULL univariate = makeFilter( name = "univariate.model.score", desc = "Resamples an mlr learner for each input feature individually. The resampling performance is used as filter score, with rpart as default learner.", - pkg = character(0L), + pkg = character(0L), supported.tasks = c("classif", "regr", "surv"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, nselect, perf.learner = NULL, perf.measure = NULL, perf.resampling = NULL, ...) { + typ = getTaskType(task) if (is.null(perf.learner)) { - if (typ == "classif") + if (typ == "classif") { perf.learner = "classif.rpart" - else if (typ == "regr") + } else if (typ == "regr") { perf.learner = "regr.rpart" - else if (typ == "surv") + } else if (typ == "surv") { perf.learner = "surv.rpart" + } } if (is.null(perf.measure)) { perf.measure = getDefaultMeasure(task) } perf.learner = checkLearner(perf.learner) perf.measure = checkMeasures(perf.measure, perf.learner) - if (length(perf.measure) != 1L) + if (length(perf.measure) != 1L) { stop("Exactly one measure must be provided") - if (is.null(perf.resampling)) + } + if (is.null(perf.resampling)) { perf.resampling = makeResampleDesc("Subsample", iters = 1L, split = 0.67) - if (getTaskType(task) != perf.learner$type) + } + if (getTaskType(task) != perf.learner$type) { stopf("Expected task of type '%s', not '%s'", getTaskType(task), perf.learner$type) + } fns = getTaskFeatureNames(task) res = double(length(fns)) @@ -769,14 +799,15 @@ univariate = makeFilter( subtask = subsetTask(task, features = fns[i]) res[i] = resample(learner = perf.learner, task = subtask, resampling = perf.resampling, measures = perf.measure, keep.pred = FALSE, show.info = FALSE)$aggr } - if (perf.measure[[1L]]$minimize) + if (perf.measure[[1L]]$minimize) { res = -1.0 * res + } setNames(res, fns) - } -) + }) .FilterRegister[["univariate"]] = univariate .FilterRegister[["univariate"]]$desc = "(DEPRECATED)" .FilterRegister[["univariate"]]$fun = function(...) { + .Deprecated(old = "Filter 'univariate'", new = "Filter 'univariate.model.score'") .FilterRegister[["univariate.model.score"]]$fun(...) } @@ -798,14 +829,15 @@ makeFilter( supported.tasks = "classif", supported.features = "numerics", fun = function(task, nselect, ...) { + data = getTaskData(task) vnapply(getTaskFeatureNames(task), function(feat.name) { + f = as.formula(stri_paste(feat.name, "~", getTaskTargetNames(task))) aov.t = aov(f, data = data) summary(aov.t)[[1L]][1L, "F value"] }) - } -) + }) # kruskal.test ---------------- @@ -827,14 +859,15 @@ makeFilter( supported.tasks = "classif", supported.features = c("numerics", "factors"), fun = function(task, nselect, ...) { + data = getTaskData(task) sapply(getTaskFeatureNames(task), function(feat.name) { + f = as.formula(stri_paste(feat.name, "~", getTaskTargetNames(task))) t = kruskal.test(f, data = data) unname(t$statistic) }) - } -) + }) # variance ---------------- @@ -853,12 +886,13 @@ makeFilter( supported.tasks = c("classif", "regr", "surv"), supported.features = "numerics", fun = function(task, nselect, na.rm = TRUE, ...) { + data = getTaskData(task) sapply(getTaskFeatureNames(task), function(feat.name) { + var(data[[feat.name]], na.rm = na.rm) }) - } -) + }) # permutation.importance ---------------- @@ -882,7 +916,8 @@ makeFilter( supported.tasks = c("classif", "regr", "surv"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, imp.learner, measure, contrast = function(x, y) x - y, - aggregation = mean, nmc = 50L, replace = FALSE, nselect) { + aggregation = mean, nmc = 50L, replace = FALSE, nselect) { + imp = generateFeatureImportanceData(task, "permutation.importance", imp.learner, interaction = FALSE, measure = measure, contrast = contrast, aggregation = aggregation, @@ -890,8 +925,7 @@ makeFilter( imp = as.numeric(imp$res) names(imp) = getTaskFeatureNames(task) return(imp) - } -) + }) # auc ---------------- @@ -908,17 +942,18 @@ NULL makeFilter( name = "auc", desc = "AUC filter for binary classification tasks", - pkg = character(0L), + pkg = character(0L), supported.tasks = "classif", supported.features = "numerics", fun = function(task, nselect, ...) { + data = getTaskData(task, target.extra = TRUE) score = vnapply(data$data, function(x, y) { + measureAUC(x, y, task$task.desc$negative, task$task.desc$positive) }, y = data$target) abs(0.5 - score) - } -) + }) #' Filters from the package \pkg{praznik} use the mutual information criteria in a greedy forward fashion: #' \dQuote{praznik_CMIM}, \dQuote{praznik_DISR}, \dQuote{praznik_JMIM}, \dQuote{praznik_JMI}, @@ -931,9 +966,11 @@ makeFilter( NULL praznik_filter = function(fun) { + force(fun) function(task, nselect, ...) { + fun = getFromNamespace(fun, ns = "praznik") data = getTaskData(task) @@ -1036,9 +1073,11 @@ makeFilter( NULL FSelectorRcpp.filter = function(type) { + force(type) function(task, nselect, ...) { + data = getTaskData(task) X = data[getTaskFeatureNames(task)] y = data[[getTaskTargetNames(task)]] @@ -1053,7 +1092,7 @@ FSelectorRcpp.filter = function(type) { makeFilter( name = "FSelectorRcpp_information.gain", desc = "Entropy-based Filters: Algorithms that find ranks of importance of discrete attributes, basing on their entropy with a continous class attribute", - pkg = "FSelectorRcpp", + pkg = "FSelectorRcpp", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors", "integer", "logical", "character"), fun = FSelectorRcpp.filter("infogain") @@ -1064,7 +1103,7 @@ makeFilter( makeFilter( name = "FSelectorRcpp_gain.ratio", desc = "Entropy-based Filters: Algorithms that find ranks of importance of discrete attributes, basing on their entropy with a continous class attribute", - pkg = "FSelectorRcpp", + pkg = "FSelectorRcpp", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors", "integer", "logical", "character"), fun = FSelectorRcpp.filter("gainratio") @@ -1075,7 +1114,7 @@ makeFilter( makeFilter( name = "FSelectorRcpp_symmetrical.uncertainty", desc = "Entropy-based Filters: Algorithms that find ranks of importance of discrete attributes, basing on their entropy with a continous class attribute", - pkg = "FSelectorRcpp", + pkg = "FSelectorRcpp", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors", "integer", "logical", "character"), fun = FSelectorRcpp.filter("symuncert") @@ -1094,34 +1133,35 @@ NULL makeFilter( name = "ranger_permutation", desc = "Variable importance based on ranger permutation importance", - pkg = "ranger", + pkg = "ranger", supported.tasks = c("classif", "regr", "surv"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, nselect, ...) { + lrn.type = paste0(getTaskType(task), ".ranger") lrn = makeLearner(lrn.type, importance = "permutation", ...) mod = train(lrn, task) ranger::importance(mod$learner.model) - } -) + }) ranger.permutation = makeFilter( name = "ranger_permutation", desc = "Variable importance based on ranger permutation importance", - pkg = "ranger", + pkg = "ranger", supported.tasks = c("classif", "regr", "surv"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, nselect, ...) { + lrn.type = paste0(getTaskType(task), ".ranger") lrn = makeLearner(lrn.type, importance = "permutation", ...) mod = train(lrn, task) ranger::importance(mod$learner.model) - } -) + }) .FilterRegister[["ranger.permutation"]] = ranger.permutation .FilterRegister[["ranger.permutation"]]$desc = "(DEPRECATED)" .FilterRegister[["ranger.permutation"]]$fun = function(...) { + .Deprecated(old = "Filter 'ranger.permutation'", new = "Filter 'ranger_permutation' (package ranger)") .FilterRegister[["ranger_permutation"]]$fun(...) } @@ -1139,35 +1179,36 @@ NULL makeFilter( name = "ranger_impurity", desc = "Variable importance based on ranger impurity importance", - pkg = "ranger", + pkg = "ranger", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, nselect, ...) { + lrn.type = paste0(getTaskType(task), ".ranger") lrn = makeLearner(lrn.type, importance = "impurity", ...) mod = train(lrn, task) ranger::importance(mod$learner.model) - } -) + }) ranger.impurity = makeFilter( name = "ranger_impurity", desc = "Variable importance based on ranger impurity importance", - pkg = "ranger", + pkg = "ranger", supported.tasks = c("classif", "regr"), supported.features = c("numerics", "factors", "ordered"), fun = function(task, nselect, ...) { + lrn.type = paste0(getTaskType(task), ".ranger") lrn = makeLearner(lrn.type, importance = "impurity", ...) mod = train(lrn, task) ranger::importance(mod$learner.model) - } -) + }) .FilterRegister[["ranger.impurity"]] = ranger.impurity .FilterRegister[["ranger.impurity"]]$desc = "(DEPRECATED)" .FilterRegister[["ranger.impurity"]]$fun = function(...) { + .Deprecated(old = "Filter 'ranger.impurity'", new = "Filter 'ranger_impurity' (package ranger)") .FilterRegister[["ranger_impurity"]]$fun(...) } diff --git a/R/FilterWrapper.R b/R/FilterWrapper.R index 0afe7ecf57..6d726815b1 100644 --- a/R/FilterWrapper.R +++ b/R/FilterWrapper.R @@ -55,6 +55,7 @@ #' print(getFilteredFeatures(mod)) #' # now nested resampling, where we extract the features that the filter method selected #' r = resample(lrn, task, outer, extract = function(model) { +#' #' getFilteredFeatures(model) #' }) #' print(r$extract) @@ -96,9 +97,9 @@ trainLearner.FilterWrapper = function(.learner, .task, .subset = NULL, .weights .task = subsetTask(.task, subset = .subset) .task = do.call(filterFeatures, c(list(task = .task, method = fw.method, - perc = fw.perc, abs = fw.abs, threshold = fw.threshold, - mandatory.feat = fw.mandatory.feat, - cache = .learner$cache), .learner$more.args)) + perc = fw.perc, abs = fw.abs, threshold = fw.threshold, + mandatory.feat = fw.mandatory.feat, + cache = .learner$cache), .learner$more.args)) m = train(.learner$next.learner, .task, weights = .weights) makeChainModel(next.model = m, cl = "FilterModel") } @@ -106,6 +107,7 @@ trainLearner.FilterWrapper = function(.learner, .task, .subset = NULL, .weights #' @export predictLearner.FilterWrapper = function(.learner, .model, .newdata, ...) { + features = getFilteredFeatures(.model) NextMethod(.newdata = .newdata[, features, drop = FALSE]) } @@ -118,11 +120,13 @@ predictLearner.FilterWrapper = function(.learner, .model, .newdata, ...) { #' @export #' @family filter getFilteredFeatures = function(model) { + UseMethod("getFilteredFeatures") } #' @export getFilteredFeatures.default = function(model) { + if (is.null(model$learner.model$next.model)) { NULL } else { @@ -132,5 +136,6 @@ getFilteredFeatures.default = function(model) { #' @export getFilteredFeatures.FilterModel = function(model) { + model$learner.model$next.model$features } diff --git a/R/HoldoutInstance_make_fixed.R b/R/HoldoutInstance_make_fixed.R index bba8984102..91d01b2572 100644 --- a/R/HoldoutInstance_make_fixed.R +++ b/R/HoldoutInstance_make_fixed.R @@ -10,6 +10,7 @@ #' @return ([ResampleInstance]). #' @export makeFixedHoldoutInstance = function(train.inds, test.inds, size) { + train.inds = asInteger(train.inds, any.missing = FALSE) test.inds = asInteger(test.inds, any.missing = FALSE) size = asInt(size, lower = 1L) diff --git a/R/HomogeneousEnsemble.R b/R/HomogeneousEnsemble.R index 02e9efef4b..2e7254823a 100644 --- a/R/HomogeneousEnsemble.R +++ b/R/HomogeneousEnsemble.R @@ -1,5 +1,6 @@ makeHomogeneousEnsemble = function(id, type, next.learner, package, par.set = makeParamSet(), learner.subclass, model.subclass, ...) { + makeBaseWrapper(id, type, next.learner, package, par.set, learner.subclass = c(learner.subclass, "HomogeneousEnsemble"), model.subclass = c(model.subclass, "HomogeneousEnsembleModel"), @@ -11,12 +12,14 @@ makeHomogeneousEnsemble = function(id, type, next.learner, package, par.set = ma #' @export # if ANY model in the list is broken --> failure isFailureModel.HomogeneousEnsembleModel = function(model) { + mods = getLearnerModel(model, more.unwrap = FALSE) any(vlapply(mods, isFailureModel)) } #' @export getFailureModelMsg.HomogeneousEnsembleModel = function(model) { + mods = getLearnerModel(model, more.unwrap = FALSE) msgs = vcapply(mods, getFailureModelMsg) j = which.first(!is.na(msgs)) @@ -25,6 +28,7 @@ getFailureModelMsg.HomogeneousEnsembleModel = function(model) { #' @export getFailureModelDump.HomogeneousEnsembleModel = function(model) { + mods = getLearnerModel(model, more.unwrap = FALSE) msgs = lapply(mods, getFailureModelDump) j = which.first(!is.null(msgs)) @@ -36,17 +40,20 @@ getFailureModelDump.HomogeneousEnsembleModel = function(model) { #' @param learner.models Deprecated. #' @export getHomogeneousEnsembleModels = function(model, learner.models = FALSE) { + .Deprecated("getLearnerModel") getLearnerModel(model, more.unwrap = learner.models) } #' @export getLearnerModel.HomogeneousEnsembleModel = function(model, more.unwrap = FALSE) { + ms = model$learner.model$next.model - if (more.unwrap) + if (more.unwrap) { extractSubList(ms, "learner.model", simplify = FALSE) - else + } else { ms + } } ############################## helpers ############################## @@ -55,12 +62,15 @@ getLearnerModel.HomogeneousEnsembleModel = function(model, more.unwrap = FALSE) # rows = newdata points, cols = ensembles members # does only work for responses, not probs, se, etc predictHomogeneousEnsemble = function(.learner, .model, .newdata, .subset = NULL, ...) { + models = getLearnerModel(.model, more.unwrap = FALSE) # for classif we convert factor to char, nicer to handle later on preds = lapply(models, function(mod) { + p = predict(mod, newdata = .newdata, subset = .subset, ...)$data$response - if (is.factor(p)) + if (is.factor(p)) { p = as.character(p) + } return(p) }) do.call(cbind, preds) @@ -69,6 +79,6 @@ predictHomogeneousEnsemble = function(.learner, .model, .newdata, .subset = NULL # call this at end of trainLearner.CostSensRegrWrapper # FIXME: potentially remove this when ChainModel is removed makeHomChainModel = function(learner, models) { + makeChainModel(next.model = models, cl = c(learner$model.subclass, "HomogeneousEnsembleModel")) } - diff --git a/R/Impute.R b/R/Impute.R index 9828fc81b8..6309954917 100644 --- a/R/Impute.R +++ b/R/Impute.R @@ -81,6 +81,7 @@ impute = function(obj, target = character(0L), classes = list(), cols = list(), dummy.classes = character(0L), dummy.cols = character(0L), dummy.type = "factor", force.dummies = FALSE, impute.new.levels = TRUE, recode.factor.levels = TRUE) { + assertList(cols) checkTargetPreproc(obj, target, names(cols)) UseMethod("impute") @@ -98,21 +99,25 @@ impute.data.frame = function(obj, target = character(0L), classes = list(), cols # check that we dont request dummy col to be created for the target if (length(target) != 0L) { not.ok = which.first(target %in% names(dummy.cols)) - if (length(not.ok) != 0L) + if (length(not.ok) != 0L) { stopf("Dummy creation of target column '%s' not possible", target[not.ok]) + } } assertList(classes) not.ok = which.first(names(classes) %nin% allowed.classes) - if (length(not.ok)) + if (length(not.ok)) { stopf("Column class '%s' for imputation not recognized", names(classes)[not.ok]) + } not.ok = which.first(names(cols) %nin% names(data)) - if (length(not.ok)) + if (length(not.ok)) { stopf("Column for imputation not present in data: %s", names(cols)[not.ok]) + } assertSubset(dummy.classes, choices = allowed.classes) assertCharacter(dummy.cols, any.missing = FALSE) not.ok = which.first(dummy.cols %nin% names(data)) - if (length(not.ok)) + if (length(not.ok)) { stopf("Column for dummy creation not present in data: %s", dummy.cols[not.ok]) + } assertCharacter(dummy.classes, any.missing = FALSE) assertFlag(force.dummies) assertChoice(dummy.type, c("factor", "numeric")) @@ -146,16 +151,19 @@ impute.data.frame = function(obj, target = character(0L), classes = list(), cols # handle dummies desc$dummies = union(names(feature.classes[feature.classes %in% dummy.classes]), dummy.cols) - if (!desc$force.dummies) + if (!desc$force.dummies) { desc$dummies = desc$dummies[vlapply(data[desc$dummies], anyMissing)] + } # cleanup desc$impute = Filter(Negate(is.null), desc$impute) # learn and thereby transform to list(impute(...), args(...)) desc$impute = Map(function(xn, x) { - if (class(x)[1L] != "ImputeMethod") + + if (class(x)[1L] != "ImputeMethod") { x = imputeConstant(x) + } list( impute = x$impute, args = do.call(x$learn, c(x$args, list(data = data, target = target, col = xn))) @@ -193,6 +201,7 @@ impute.Task = function(obj, target = character(0L), classes = list(), cols = lis #' @export print.ImputationDesc = function(x, ...) { + catf("Imputation description") catf("Target: %s", collapse(x$target)) catf("Features: %i; Imputed: %i", length(x$features), length(x$impute)) @@ -221,18 +230,21 @@ print.ImputationDesc = function(x, ...) { #' @family impute #' @export reimpute = function(obj, desc) { + UseMethod("reimpute") } #' @export reimpute.data.frame = function(obj, desc) { + assertClass(desc, classes = "ImputationDesc") x = as.list(obj) # check for new columns new.cols = names(which(names(x) %nin% desc$cols)) - if (length(new.cols)) + if (length(new.cols)) { stop("New columns (%s) found in data. Unable to impute.", collapse(new.cols)) + } # check for same storage type classes = vcapply(x, function(x) class(x)[1L]) @@ -249,12 +261,14 @@ reimpute.data.frame = function(obj, desc) { dummy.cols = lapply(x[desc$dummies], is.na) names(dummy.cols) = sprintf("%s.dummy", desc$dummies) not.ok = which.first(names(dummy.cols) %in% names(x)) - if (length(not.ok)) + if (length(not.ok)) { stopf("Dummy column '%s' already present in data", names(dummy.cols)[not.ok]) - dummy.cols = if (desc$dummy.type == "numeric") + } + dummy.cols = if (desc$dummy.type == "numeric") { lapply(dummy.cols, as.numeric) - else + } else { lapply(dummy.cols, factor, levels = c("FALSE", "TRUE")) + } # check for new levels and replace with NAs if (desc$impute.new.levels) { @@ -262,21 +276,23 @@ reimpute.data.frame = function(obj, desc) { newlvls = Map(function(x, expected) setdiff(levels(x), expected), x = x[cols], expected = desc$lvls) newlvls = Filter(length, newlvls) - if (length(newlvls)) + if (length(newlvls)) { x[names(newlvls)] = Map(function(x, nl) droplevels(replace(x, x %in% nl, NA)), x = x[names(newlvls)], nl = newlvls) + } } # actually do the imputation cols = intersect(names(x), names(desc$impute)) x[cols] = Map( function(xn, obj) do.call(obj$impute, c(list(data = x, target = desc$target, col = xn), obj$args)), - xn = cols, obj = desc$impute[cols]) + xn = cols, obj = desc$impute[cols]) # recode factor levels if (desc$recode.factor.levels) { cols = names(desc$lvls) x[cols] = Map(function(x, expected) { + factor(as.character(x), levels = expected) }, x = x[cols], expected = desc$lvls) } @@ -288,6 +304,7 @@ reimpute.data.frame = function(obj, desc) { #' @export reimpute.Task = function(obj, desc) { + df = getTaskData(obj) imputed = reimpute.data.frame(df, desc) x = changeData(obj, data = imputed) diff --git a/R/ImputeMethods.R b/R/ImputeMethods.R index fb0e459fc2..3fe822dfab 100644 --- a/R/ImputeMethods.R +++ b/R/ImputeMethods.R @@ -23,6 +23,7 @@ #' @family impute #' @export makeImputeMethod = function(learn, impute, args = list()) { + assertFunction(learn, args = c("data", "target", "col")) assertFunction(impute, args = c("data", "target", "col")) assertList(args, names = "named") @@ -31,8 +32,10 @@ makeImputeMethod = function(learn, impute, args = list()) { # helper function to impute missings of a col to const val simpleImpute = function(data, target, col, const) { - if (is.na(const)) + + if (is.na(const)) { stopf("Error imputing column '%s'. Maybe all input data was missing?", col) + } x = data[[col]] # cast logicals to factor if required (#1522) @@ -75,6 +78,7 @@ NULL #' Constant valued use for imputation. #' @rdname imputations imputeConstant = function(const) { + assertVector(const, len = 1L, any.missing = FALSE) makeImputeMethod( learn = function(data, target, col, const) const, @@ -86,6 +90,7 @@ imputeConstant = function(const) { #' @export #' @rdname imputations imputeMedian = function() { + makeImputeMethod( learn = function(data, target, col) median(data[[col]], na.rm = TRUE), impute = simpleImpute @@ -95,6 +100,7 @@ imputeMedian = function() { #' @export #' @rdname imputations imputeMean = function() { + makeImputeMethod( learn = function(data, target, col) mean(data[[col]], na.rm = TRUE), impute = simpleImpute @@ -104,6 +110,7 @@ imputeMean = function() { #' @export #' @rdname imputations imputeMode = function() { + makeImputeMethod( learn = function(data, target, col) computeMode(data[[col]], na.rm = TRUE), impute = simpleImpute @@ -115,9 +122,11 @@ imputeMode = function() { #' Value that stored minimum or maximum is multiplied with when imputation is done. #' @rdname imputations imputeMin = function(multiplier = 1) { + assertNumber(multiplier) makeImputeMethod( learn = function(data, target, col, multiplier) { + r = range(data[[col]], na.rm = TRUE) r[1L] - multiplier * diff(r) }, @@ -129,9 +138,11 @@ imputeMin = function(multiplier = 1) { #' @export #' @rdname imputations imputeMax = function(multiplier = 1) { + assertNumber(multiplier) makeImputeMethod( learn = function(data, target, col, multiplier) { + r = range(data[[col]], na.rm = TRUE) r[2L] + multiplier * diff(r) }, @@ -149,23 +160,28 @@ imputeMax = function(multiplier = 1) { #' If NA (default), it will be estimated from the data. #' @rdname imputations imputeUniform = function(min = NA_real_, max = NA_real_) { + assertNumber(min, na.ok = TRUE) assertNumber(max, na.ok = TRUE) makeImputeMethod( - learn = function(data, target, col, min, max) { + learn = function(data, target, col, min, max) { + if (is.na(min)) { min = min(data[[col]], na.rm = TRUE) - if (is.na(min)) + if (is.na(min)) { stop("All values are missing. Unable to calculate minimum.") + } } if (is.na(max)) { max = max(data[[col]], na.rm = TRUE) - if (is.na(max)) + if (is.na(max)) { stop("All values are missing. Unable to calculate maximum.") + } } list(min = min, max = max) }, impute = function(data, target, col, min, max) { + x = data[[col]] ind = is.na(x) replace(x, ind, runif(sum(ind), min = min, max = max)) @@ -181,24 +197,29 @@ imputeUniform = function(min = NA_real_, max = NA_real_) { #' Standard deviation of normal distribution. If missing it will be estimated from the data. #' @rdname imputations imputeNormal = function(mu = NA_real_, sd = NA_real_) { + assertNumber(mu, na.ok = TRUE) assertNumber(sd, na.ok = TRUE) makeImputeMethod( - learn = function(data, target, col, mu, sd) { + learn = function(data, target, col, mu, sd) { + if (is.na(mu)) { mu = mean(data[[col]], na.rm = TRUE) - if (is.na(mu)) + if (is.na(mu)) { stop("All values missing. Unable to calculate mean.") + } } if (is.na(sd)) { sd = sd(data[[col]], na.rm = TRUE) - if (is.na(sd)) + if (is.na(sd)) { stop("All values missing. Unable to calculate sd.") + } } list(mu = mu, sd = sd) }, impute = function(data, target, col, mu, sd) { + x = data[[col]] ind = is.na(x) replace(x, ind, rnorm(sum(ind), mean = mu, sd = sd)) @@ -216,6 +237,7 @@ imputeNormal = function(mu = NA_real_, sd = NA_real_) { #' or instead draw uniformly distributed samples within bin range. #' @rdname imputations imputeHist = function(breaks, use.mids = TRUE) { + if (missing(breaks)) { breaks = "Sturges" } @@ -227,25 +249,30 @@ imputeHist = function(breaks, use.mids = TRUE) { makeImputeMethod( learn = function(data, target, col, breaks, use.mids) { + x = data[[col]] - if (all(is.na(x))) + if (all(is.na(x))) { stop("All values missing. Unable to impute with Hist.") + } if (is.numeric(x)) { tmp = hist(x, breaks = breaks, plot = FALSE) - if (use.mids) + if (use.mids) { return(list(counts = tmp$counts, values = tmp$mids)) - else + } else { return(list(counts = tmp$counts, breaks = tmp$breaks)) + } } else { # factor or logical feature tmp = table(x, useNA = "no") values = names(tmp) - if (is.logical(x)) + if (is.logical(x)) { values = as.logical(x) + } return(list(counts = as.integer(tmp), values = values)) } }, impute = function(data, target, col, counts, values, breaks) { + x = data[[col]] ind = which(is.na(x)) if (missing(values)) { @@ -271,24 +298,30 @@ imputeHist = function(breaks, use.mids = TRUE) { #' @rdname imputations #' @export imputeLearner = function(learner, features = NULL) { + learner = checkLearner(learner) - if (!is.null(features)) + if (!is.null(features)) { assertCharacter(features, any.missing = FALSE) + } makeImputeMethod( learn = function(data, target, col, learner, features) { + constructor = getTaskConstructorForLearner(learner) if (is.null(features)) { features = setdiff(names(data), target) } else { not.ok = which(features %nin% names(data)) - if (length(not.ok)) + if (length(not.ok)) { stopf("Features for imputation not found in data: '%s'", collapse(features[not.ok])) + } not.ok = which.first(target %in% features) - if (length(not.ok)) + if (length(not.ok)) { stopf("Target column used as feature for imputation: '%s'", target[not.ok]) - if (col %nin% features) + } + if (col %nin% features) { features = c(col, features) + } } # features used for imputation might have NAs, but the learner might not support that # we need an extra check, otherwise this might not get noticed by checkLearnerBeforeTrain because @@ -307,6 +340,7 @@ imputeLearner = function(learner, features = NULL) { }, impute = function(data, target, col, model, features) { + x = data[[col]] ind = is.na(x) # if no NAs are present in data, we always return it unchanged diff --git a/R/ImputeWrapper.R b/R/ImputeWrapper.R index 42ad8e6567..e165725d72 100644 --- a/R/ImputeWrapper.R +++ b/R/ImputeWrapper.R @@ -14,6 +14,7 @@ #' @template ret_learner makeImputeWrapper = function(learner, classes = list(), cols = list(), dummy.classes = character(0L), dummy.cols = character(0L), dummy.type = "factor", force.dummies = FALSE, impute.new.levels = TRUE, recode.factor.levels = TRUE) { + learner = checkLearner(learner) args = list(classes = classes, cols = cols, dummy.classes = dummy.classes, dummy.cols = dummy.cols, dummy.type = dummy.type, force.dummies = force.dummies, @@ -21,10 +22,12 @@ makeImputeWrapper = function(learner, classes = list(), cols = list(), rm(list = names(args)) trainfun = function(data, target, args) { + setNames(do.call(impute, c(list(obj = data, target = target), args)), c("data", "control")) } predictfun = function(data, target, args, control) { + reimpute(data, control) } @@ -35,5 +38,6 @@ makeImputeWrapper = function(learner, classes = list(), cols = list(), #' @export getLearnerProperties.ImputeWrapper = function(learner) { + union(getLearnerProperties(learner$next.learner), "missings") } diff --git a/R/Learner.R b/R/Learner.R index 0f5b35c329..d94d98f48b 100644 --- a/R/Learner.R +++ b/R/Learner.R @@ -3,8 +3,10 @@ # in contains a minimal number of member vars, that every Learner object should have # derived constructors can of course add more member vars makeLearnerBaseConstructor = function(classes, id, type, package, properties, par.set, par.vals, predict.type, cache = FALSE) { - if (length(par.vals) == 0L) + + if (length(par.vals) == 0L) { names(par.vals) = character(0L) + } learner = makeS3Obj(c(classes, "Learner"), id = id, @@ -22,6 +24,7 @@ makeLearnerBaseConstructor = function(classes, id, type, package, properties, pa #' @export print.Learner = function(x, ...) { + cat( "Learner ", x$id, " from package ", collapse(cleanupPackageNames(x$package)), "\n", "Type: ", x$type, "\n", diff --git a/R/Learner_operators.R b/R/Learner_operators.R index 336a033dd5..88be708a3a 100644 --- a/R/Learner_operators.R +++ b/R/Learner_operators.R @@ -6,6 +6,7 @@ #' @export #' @family learner getLearnerType = function(learner) { + learner = checkLearner(learner) return(learner$type) } @@ -18,6 +19,7 @@ getLearnerType = function(learner) { #' @export #' @family learner getLearnerId = function(learner) { + learner = checkLearner(learner) return(learner$id) } @@ -30,6 +32,7 @@ getLearnerId = function(learner) { #' @export #' @family learner getLearnerPredictType = function(learner) { + learner = checkLearner(learner) return(learner$predict.type) } @@ -42,6 +45,7 @@ getLearnerPredictType = function(learner) { #' @export #' @family learner getLearnerPackages = function(learner) { + learner = checkLearner(learner) return(learner$package) } @@ -57,6 +61,7 @@ getLearnerPackages = function(learner) { #' @export #' @family learner getLearnerParamSet = function(learner) { + getParamSet(learner) } @@ -72,6 +77,7 @@ getLearnerParamSet = function(learner) { #' @export #' @family learner getLearnerParVals = function(learner, for.fun = c("train", "predict", "both")) { + learner = checkLearner(learner) getHyperPars(learner, for.fun) } @@ -86,6 +92,7 @@ getLearnerParVals = function(learner, for.fun = c("train", "predict", "both")) { #' @export #' @family learner setLearnerId = function(learner, id) { + learner = checkLearner(learner) assertString(id) learner$id = id @@ -102,6 +109,7 @@ setLearnerId = function(learner, id) { #' @export #' @family learner getLearnerShortName = function(learner) { + learner = checkLearner(learner) learner.short.name = learner$short.name @@ -118,4 +126,3 @@ getLearnerShortName = function(learner) { return(learner.short.name) } - diff --git a/R/Learner_properties.R b/R/Learner_properties.R index 89735b3566..7276a40a29 100644 --- a/R/Learner_properties.R +++ b/R/Learner_properties.R @@ -34,21 +34,25 @@ NULL #' @rdname LearnerProperties #' @export getLearnerProperties = function(learner) { + UseMethod("getLearnerProperties") } #' @export getLearnerProperties.Learner = function(learner) { + learner$properties } #' @export getLearnerProperties.character = function(learner) { + getLearnerProperties(checkLearner(learner)) } #' @export getLearnerProperties.ModelMultiplexer = function(learner) { + selected = learner$par.vals$selected.learner # NB: this is not set during construction if (is.null(selected)) learner$properties else getLearnerProperties(learner$base.learners[[selected]]) @@ -57,6 +61,7 @@ getLearnerProperties.ModelMultiplexer = function(learner) { #' @rdname LearnerProperties #' @export hasLearnerProperties = function(learner, props) { + learner = checkLearner(learner) assertSubset(props, listLearnerProperties()) props %in% getLearnerProperties(learner) @@ -67,6 +72,7 @@ hasLearnerProperties = function(learner, props) { #' @param props Deprecated. #' @export hasProperties = function(learner, props) { + .Deprecated("hasLearnerProperties") hasLearnerProperties(learner, props) } @@ -83,6 +89,7 @@ hasProperties = function(learner, props) { #' #' @export listLearnerProperties = function(type = "any") { + all.props = c(listTaskTypes(), "any") assertSubset(type, all.props) mlr$learner.properties[[type]] diff --git a/R/Measure.R b/R/Measure.R index c716dbd78c..929deb1970 100644 --- a/R/Measure.R +++ b/R/Measure.R @@ -90,20 +90,23 @@ #' makeMeasure(id = "my.sse", minimize = TRUE, properties = c("regr", "response"), fun = f) makeMeasure = function(id, minimize, properties = character(0L), fun, extra.args = list(), aggr = test.mean, best = NULL, worst = NULL, name = id, note = "") { + assertString(id) assertFlag(minimize) assertCharacter(properties, any.missing = FALSE) assertFunction(fun) assertList(extra.args) assertString(note) - if (is.null(best)) + if (is.null(best)) { best = ifelse(minimize, -Inf, Inf) - else + } else { assertNumber(best) - if (is.null(worst)) + } + if (is.null(worst)) { worst = ifelse(minimize, Inf, -Inf) - else + } else { assertNumber(worst) + } m = makeS3Obj("Measure", id = id, @@ -138,16 +141,18 @@ makeMeasure = function(id, minimize, properties = character(0L), #' @return ([Measure]). #' @export getDefaultMeasure = function(x) { - type = if (inherits(x, "TaskDesc")) + + type = if (inherits(x, "TaskDesc")) { x$type - else if (inherits(x, "Task")) + } else if (inherits(x, "Task")) { x$task.desc$type - else if (inherits(x, "Learner")) + } else if (inherits(x, "Learner")) { x$type - else if (x %in% listLearners()$class) + } else if (x %in% listLearners()$class) { stri_split_fixed(x, ".", simplify = TRUE)[1] - else + } else { x + } switch(type, classif = mmce, cluster = db, @@ -160,6 +165,7 @@ getDefaultMeasure = function(x) { #' @export print.Measure = function(x, ...) { + catf("Name: %s", x$name) catf("Performance measure: %s", x$id) catf("Properties: %s", collapse(x$properties)) diff --git a/R/Measure_colAUC.R b/R/Measure_colAUC.R index bf8e9deeb6..a85da79d2c 100644 --- a/R/Measure_colAUC.R +++ b/R/Measure_colAUC.R @@ -5,21 +5,26 @@ # https://www.math.ucdavis.edu/~saito/data/roc/ferri-class-perf-metrics.pdf colAUC = function(samples, truth, maximum = TRUE) { + y = as.factor(truth) X = as.matrix(samples) - if (nrow(X) == 1) + if (nrow(X) == 1) { X = t(X) + } nr = nrow(X) nc = ncol(X) ny = table(y) ul = as.factor(rownames(ny)) nl = length(ny) - if (nl <= 1) + if (nl <= 1) { stop("colAUC: List of labels 'y' have to contain at least 2 class labels.") - if (!is.numeric(X)) + } + if (!is.numeric(X)) { stop("colAUC: 'X' must be numeric") - if (nr != length(y)) + } + if (nr != length(y)) { stop("colAUC: length(y) and nrow(X) must be the same") + } per = t(utils::combn(1:nl, 2)) np = nrow(per) auc = matrix(0.5, np, nc) diff --git a/R/Measure_custom_resampled.R b/R/Measure_custom_resampled.R index 4d4623f201..1d549905cf 100644 --- a/R/Measure_custom_resampled.R +++ b/R/Measure_custom_resampled.R @@ -69,7 +69,7 @@ makeCustomResampledMeasure = function(measure.id, aggregation.id, minimize = TRU fun1 = function(task, model, pred, feats, extra.args) NA_real_ # args are checked here custom = makeMeasure(id = measure.id, minimize, properties, fun1, extra.args, - best = best, worst = worst, name = measure.name, note = note) + best = best, worst = worst, name = measure.name, note = note) fun2 = function(task, perf.test, perf.train, measure, group, pred) fun(task, group, pred, extra.args) # we set the properties to "no requirements" here diff --git a/R/Measure_make_cost.R b/R/Measure_make_cost.R index 11110e1945..93d34e594d 100644 --- a/R/Measure_make_cost.R +++ b/R/Measure_make_cost.R @@ -23,7 +23,8 @@ #' @export #' @family performance makeCostMeasure = function(id = "costs", minimize = TRUE, costs, combine = mean, best = NULL, worst = NULL, - name = id, note = "") { + name = id, note = "") { + assertString(id) assertFlag(minimize) assertMatrix(costs) @@ -36,23 +37,28 @@ makeCostMeasure = function(id = "costs", minimize = TRUE, costs, combine = mean, properties = c("classif", "classif.multi", "req.pred", "req.truth", "predtype.response", "predtype.prob"), best = best, worst = worst, fun = function(task, model, pred, feats, extra.args) { - #check costs + + # check costs td = pred$task.desc levs = td$class.levels if (any(dim(costs))) { - if (any(dim(costs) != length(levs))) + if (any(dim(costs) != length(levs))) { stop("Dimensions of costs have to be the same as number of class levels!") + } rns = rownames(costs) cns = colnames(costs) - if (!setequal(rns, levs) || !setequal(cns, levs)) + if (!setequal(rns, levs) || !setequal(cns, levs)) { stop("Row and column names of cost matrix have to equal class levels!") + } } costs = extra.args[[1L]] # cannot index with NA r = pred$data$response - if (anyMissing(r)) + if (anyMissing(r)) { return(NA_real_) + } cc = function(truth, pred) { + costs[truth, pred] } y = mapply(cc, as.character(pred$data$truth), as.character(r)) diff --git a/R/Measure_operators.R b/R/Measure_operators.R index 4cfe419e76..fa0ba752a9 100644 --- a/R/Measure_operators.R +++ b/R/Measure_operators.R @@ -15,6 +15,7 @@ #' @family performance #' @export setMeasurePars = function(measure, ..., par.vals = list()) { + args = list(...) assertClass(measure, classes = "Measure") assertList(args, names = "unique", .var.name = "parameter settings") @@ -36,6 +37,7 @@ setMeasurePars = function(measure, ..., par.vals = list()) { #' @family performance #' @export setAggregation = function(measure, aggr) { + assertClass(measure, classes = "Measure") assertClass(aggr, classes = "Aggregation") measure$aggr = aggr diff --git a/R/Measure_properties.R b/R/Measure_properties.R index ffe6ff6208..1064f14d8b 100644 --- a/R/Measure_properties.R +++ b/R/Measure_properties.R @@ -18,6 +18,7 @@ NULL #' @rdname MeasureProperties #' @export getMeasureProperties = function(measure) { + assertClass(measure, classes = "Measure") measure$properties } @@ -25,6 +26,7 @@ getMeasureProperties = function(measure) { #' @rdname MeasureProperties #' @export hasMeasureProperties = function(measure, props) { + assertClass(measure, classes = "Measure") assertSubset(props, listMeasureProperties()) props %in% getMeasureProperties(measure) @@ -39,5 +41,6 @@ hasMeasureProperties = function(measure, props) { #' #' @export listMeasureProperties = function() { + mlr$measure.properties } diff --git a/R/ModelMultiplexer.R b/R/ModelMultiplexer.R index 410ab0108e..097b1416fa 100644 --- a/R/ModelMultiplexer.R +++ b/R/ModelMultiplexer.R @@ -50,10 +50,10 @@ #' ctrl = makeTuneControlRandom(maxit = 10L) #' res = tuneParams(lrn, iris.task, rdesc, par.set = ps, control = ctrl) #' print(res) -#' +#' #' df = as.data.frame(res$opt.path) #' print(head(df[, -ncol(df)])) -#' +#' #' # more unique and reliable way to construct the param set #' ps = makeModelMultiplexerParamSet(lrn, #' classif.ksvm = makeParamSet( @@ -63,7 +63,7 @@ #' makeIntegerParam("ntree", lower = 1L, upper = 500L) #' ) #' ) -#' +#' #' # this is how you would construct the param set manually, works too #' ps = makeParamSet( #' makeDiscreteParam("selected.learner", values = extractSubList(bls, "id")), @@ -72,10 +72,11 @@ #' makeIntegerParam("classif.randomForest.ntree", lower = 1L, upper = 500L, #' requires = quote(selected.learner == "classif.randomForst")) #' ) -#' +#' #' # all three ps-objects are exactly the same internally. #' } makeModelMultiplexer = function(base.learners) { + lrn = makeBaseEnsemble( id = "ModelMultiplexer", base.learners = base.learners, @@ -96,6 +97,7 @@ makeModelMultiplexer = function(base.learners) { #' @export trainLearner.ModelMultiplexer = function(.learner, .task, .subset, .weights = NULL, selected.learner, ...) { + # train selected learner model and remove prefix from its param settings bl = .learner$base.learners[[selected.learner]] m = train(bl, task = .task, subset = .subset, weights = .weights) @@ -104,6 +106,7 @@ trainLearner.ModelMultiplexer = function(.learner, .task, .subset, .weights = NU #' @export predictLearner.ModelMultiplexer = function(.learner, .model, .newdata, ...) { + # simply predict with the model sl = .learner$par.vals$selected.learner bl = .learner$base.learners[[sl]] @@ -115,22 +118,25 @@ predictLearner.ModelMultiplexer = function(.learner, .model, .newdata, ...) { #' @export makeWrappedModel.ModelMultiplexer = function(learner, learner.model, task.desc, subset, features, factor.levels, time) { + addClasses(NextMethod(), "ModelMultiplexerModel") } #' @export getLearnerModel.ModelMultiplexerModel = function(model, more.unwrap = FALSE) { + if (inherits(model$learner.model, "NoFeaturesModel")) { return(model$learner.model) } - if (more.unwrap) + if (more.unwrap) { model$learner.model$next.model$learner.model - else + } else { model$learner.model$next.model + } } #' @export isFailureModel.ModelMultiplexerModel = function(model) { + NextMethod() || (!inherits(model$learner.model, "NoFeaturesModel") && isFailureModel(model$learner.model$next.model)) } - diff --git a/R/ModelMultiplexerParamSet.R b/R/ModelMultiplexerParamSet.R index 00e7cc0b48..494b4789ac 100644 --- a/R/ModelMultiplexerParamSet.R +++ b/R/ModelMultiplexerParamSet.R @@ -30,6 +30,7 @@ #' @examples #' # See makeModelMultiplexer makeModelMultiplexerParamSet = function(multiplexer, ..., .check = TRUE) { + assertClass(multiplexer, classes = "ModelMultiplexer") assertFlag(.check) @@ -40,8 +41,9 @@ makeModelMultiplexerParamSet = function(multiplexer, ..., .check = TRUE) { new.ps = makeParamSet( makeDiscreteParam("selected.learner", values = bl.ids) ) - if (length(args) == 0L) + if (length(args) == 0L) { return(new.ps) + } # if basic param were passed we now group them into param sets # we match each param in the base learners and add it to the correct parset @@ -57,10 +59,12 @@ makeModelMultiplexerParamSet = function(multiplexer, ..., .check = TRUE) { # end of param name we need to find long.pid.end = sprintf("\\.%s$", pid) found = stri_subset_regex(all.par.ids, long.pid.end) - if (length(found) == 0L) + if (length(found) == 0L) { stopf("No param of id '%s' in any base learner!", pid) - if (length(found) > 1L) + } + if (length(found) > 1L) { stopf("Multiple params of id '%s' found in base learners, pass correctly grouped param sets!", pid) + } # get the learner that is referenced from prefix of found string + add param to correct parset for.learner = stri_replace(found, "", regex = long.pid.end) for.pars = pss[[for.learner]]$pars @@ -81,13 +85,15 @@ makeModelMultiplexerParamSet = function(multiplexer, ..., .check = TRUE) { ps = pss[[i]] ps.id = pss.ids[i] bl = bls[[ps.id]] - if (is.null(bl)) + if (is.null(bl)) { stopf("Passed param set for '%s', no base learner in multiplexer with this id!", ps.id) + } for (j in seq_along(ps$pars)) { p = ps$pars[[j]] pid = p$id - if (.check && (pid %nin% getParamIds(bl$par.set))) + if (.check && (pid %nin% getParamIds(bl$par.set))) { stopf("No param of id '%s' in base learner '%s'!", pid, bl$id) + } p$id = stri_paste(bl$id, pid, sep = ".") p$requires = asQuoted(sprintf("selected.learner == '%s'", bl$id)) ps$pars[[j]] = p diff --git a/R/MulticlassWrapper.R b/R/MulticlassWrapper.R index d8bb190bbc..06f4df0e05 100644 --- a/R/MulticlassWrapper.R +++ b/R/MulticlassWrapper.R @@ -29,6 +29,7 @@ #' @family wrapper #' @export makeMulticlassWrapper = function(learner, mcw.method = "onevsrest") { + learner = checkLearner(learner) ps = makeParamSet( makeUntypedLearnerParam(id = "mcw.method", default = "onevsrest") @@ -41,7 +42,7 @@ makeMulticlassWrapper = function(learner, mcw.method = "onevsrest") { id = stri_paste(learner$id, "multiclass", sep = ".") x = makeHomogeneousEnsemble(id = id, type = "classif", next.learner = learner, - package = learner$package, par.set = ps, par.vals = pv, + package = learner$package, par.set = ps, par.vals = pv, learner.subclass = "MulticlassWrapper", model.subclass = "MulticlassModel") x = setPredictType(x, predict.type = "response") return(x) @@ -49,6 +50,7 @@ makeMulticlassWrapper = function(learner, mcw.method = "onevsrest") { #' @export trainLearner.MulticlassWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, mcw.method, ...) { + .task = subsetTask(.task, .subset) y = getTaskTargets(.task) cm = buildCMatrix(mcw.method, .task) @@ -57,13 +59,14 @@ trainLearner.MulticlassWrapper = function(.learner, .task, .subset = NULL, .weig parallelLibrary("mlr", master = FALSE, level = "mlr.ensemble", show.info = FALSE) exportMlrOptions(level = "mlr.ensemble") models = parallelMap(i = seq_along(x$row.inds), doMulticlassTrainIteration, - more.args = args, level = "mlr.ensemble") + more.args = args, level = "mlr.ensemble") m = makeHomChainModel(.learner, models) m$cm = cm return(m) } doMulticlassTrainIteration = function(x, i, learner, task, weights) { + setSlaveOptions() d = getTaskData(task) tn = getTaskTargetNames(task) @@ -77,19 +80,23 @@ doMulticlassTrainIteration = function(x, i, learner, task, weights) { #' @export predictLearner.MulticlassWrapper = function(.learner, .model, .newdata, .subset = NULL, ...) { + models = .model$learner.model$next.model cm = .model$learner.model$cm # predict newdata with every binary model, get n x n.models matrix of +1,-1 # FIXME: this will break for length(models) == 1? do not use sapply! p = sapply(models, function(m) { + pred = predict(m, newdata = .newdata, subset = .subset, ...)$data$response - if (is.factor(pred)) + if (is.factor(pred)) { pred = as.numeric(pred == "1") * 2 - 1 + } pred }) rns = rownames(cm) # we use hamming decoding here, see http://jmlr.org/papers/volume11/escalera10a/escalera10a.pdf y = apply(p, 1L, function(v) { + d = apply(cm, 1L, function(z) sum((1 - sign(v * z)) / 2)) rns[getMinIndex(d)] }) @@ -97,7 +104,8 @@ predictLearner.MulticlassWrapper = function(.learner, .model, .newdata, .subset } #' @export -getLearnerProperties.MulticlassWrapper = function(learner){ +getLearnerProperties.MulticlassWrapper = function(learner) { + props = getLearnerProperties(learner$next.learner) props = union(props, "multiclass") setdiff(props, "prob") @@ -106,6 +114,7 @@ getLearnerProperties.MulticlassWrapper = function(learner){ ############################## helpers ############################## buildCMatrix = function(mcw.method, .task) { + if (is.function(mcw.method)) { meth = mcw.method } else { @@ -115,22 +124,27 @@ buildCMatrix = function(mcw.method, .task) { } levs = getTaskClassLevels(.task) cm = meth(.task) - if (!setequal(rownames(cm), levs)) + if (!setequal(rownames(cm), levs)) { stop("Rownames of codematrix must be class levels!") - if (!all(cm == 1 | cm == -1 | cm == 0)) + } + if (!all(cm == 1 | cm == -1 | cm == 0)) { stop("Codematrix must only contain: -1, 0, +1!") + } cm } # function for multi-to-binary problem conversion multi.to.binary = function(target, codematrix) { - if (anyMissing(codematrix)) + + if (anyMissing(codematrix)) { stop("Code matrix contains missing values!") + } levs = levels(target) rns = rownames(codematrix) - if (is.null(rns) || !setequal(rns, levs)) + if (is.null(rns) || !setequal(rns, levs)) { stop("Rownames of code matrix have to be the class levels!") + } binary.targets = as.data.frame(codematrix[target, , drop = FALSE]) row.inds = lapply(binary.targets, function(v) which(v != 0)) @@ -140,6 +154,7 @@ multi.to.binary = function(target, codematrix) { } cm.onevsrest = function(task) { + tcl = getTaskClassLevels(task) n = length(tcl) cm = matrix(-1, n, n) @@ -148,6 +163,7 @@ cm.onevsrest = function(task) { } cm.onevsone = function(task) { + tcl = getTaskClassLevels(task) n = length(tcl) cm = matrix(0, n, choose(n, 2)) diff --git a/R/MultilabelBinaryRelevanceWrapper.R b/R/MultilabelBinaryRelevanceWrapper.R index cb1a2db25a..49c34dd95d 100644 --- a/R/MultilabelBinaryRelevanceWrapper.R +++ b/R/MultilabelBinaryRelevanceWrapper.R @@ -26,6 +26,7 @@ #' @export #' @example inst/examples/MultilabelWrapper.R makeMultilabelBinaryRelevanceWrapper = function(learner) { + learner = checkLearner(learner, type = "classif") id = stri_paste("multilabel.binaryRelevance", getLearnerId(learner), sep = ".") packs = getLearnerPackages(learner) @@ -38,6 +39,7 @@ makeMultilabelBinaryRelevanceWrapper = function(learner) { #' @export trainLearner.MultilabelBinaryRelevanceWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, ...) { + targets = getTaskTargetNames(.task) .task = subsetTask(.task, subset = .subset) parallelLibrary("mlr", master = FALSE, level = "mlr.ensemble", show.info = FALSE) @@ -51,6 +53,7 @@ trainLearner.MultilabelBinaryRelevanceWrapper = function(.learner, .task, .subse } doMultilabelBinaryRelevanceTrainIteration = function(tn, learner, task, weights) { + setSlaveOptions() data = getTaskData(task) task = makeClassifTask(id = tn, data = dropNamed(data, setdiff(getTaskTargetNames(task), tn)), target = tn) @@ -60,11 +63,12 @@ doMultilabelBinaryRelevanceTrainIteration = function(tn, learner, task, weights) #' @export predictLearner.MultilabelBinaryRelevanceWrapper = function(.learner, .model, .newdata, .subset = NULL, ...) { + models = getLearnerModel(.model, more.unwrap = FALSE) - f = if (.learner$predict.type == "response") + f = if (.learner$predict.type == "response") { function(m) as.logical(getPredictionResponse(predict(m, newdata = .newdata, subset = .subset, ...))) - else + } else { function(m) getPredictionProbabilities(predict(m, newdata = .newdata, subset = .subset, ...), cl = "TRUE") + } asMatrixCols(lapply(models, f)) } - diff --git a/R/MultilabelClassifierChainsWrapper.R b/R/MultilabelClassifierChainsWrapper.R index e7142129db..3f66cee1cd 100644 --- a/R/MultilabelClassifierChainsWrapper.R +++ b/R/MultilabelClassifierChainsWrapper.R @@ -22,6 +22,7 @@ #' @export #' @example inst/examples/MultilabelWrapper.R makeMultilabelClassifierChainsWrapper = function(learner, order = NULL) { + learner = checkLearner(learner, type = "classif", props = "twoclass") id = stri_paste("multilabel.classifierChains", getLearnerId(learner), sep = ".") packs = getLearnerPackages(learner) @@ -35,9 +36,10 @@ makeMultilabelClassifierChainsWrapper = function(learner, order = NULL) { } #' @export -trainLearner.MultilabelClassifierChainsWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, ...){ +trainLearner.MultilabelClassifierChainsWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, ...) { + if (is.null(.learner$order)) { - order = sample(getTaskTargetNames(.task)) #random order + order = sample(getTaskTargetNames(.task)) # random order } else { order = .learner$order } @@ -51,7 +53,7 @@ trainLearner.MultilabelClassifierChainsWrapper = function(.learner, .task, .subs chained.targets = setdiff(chained.targets, tn) data2 = dropNamed(data, chained.targets) index = which(names(data2) %in% setdiff(targets, tn)) - if (length(index) != 0) { #convert augmented features into 0/1 variables, since boolean doesn't work + if (length(index) != 0) { # convert augmented features into 0/1 variables, since boolean doesn't work data2[, index] = sapply(data2[, index], as.numeric) } ctask = makeClassifTask(id = tn, data = data2, target = tn) @@ -62,6 +64,7 @@ trainLearner.MultilabelClassifierChainsWrapper = function(.learner, .task, .subs #' @export predictLearner.MultilabelClassifierChainsWrapper = function(.learner, .model, .newdata, .subset = NULL, ...) { + models = getLearnerModel(.model, more.unwrap = FALSE) predmatrix = matrix(ncol = length(models), nrow = nrow(.newdata), dimnames = list(NULL, names(models))) if (.learner$predict.type == "response") { @@ -75,6 +78,5 @@ predictLearner.MultilabelClassifierChainsWrapper = function(.learner, .model, .n .newdata[tn] = predmatrix[, tn] } } - predmatrix[, .model$task.desc$class.levels] #bring labels back in original order + predmatrix[, .model$task.desc$class.levels] # bring labels back in original order } - diff --git a/R/MultilabelDBRWrapper.R b/R/MultilabelDBRWrapper.R index f9c627cffb..31e6d913fa 100644 --- a/R/MultilabelDBRWrapper.R +++ b/R/MultilabelDBRWrapper.R @@ -21,6 +21,7 @@ #' @export #' @example inst/examples/MultilabelWrapper.R makeMultilabelDBRWrapper = function(learner) { + learner = checkLearner(learner, type = "classif", props = "twoclass") id = stri_paste("multilabel.DBR", getLearnerId(learner), sep = ".") packs = getLearnerPackages(learner) @@ -34,6 +35,7 @@ makeMultilabelDBRWrapper = function(learner) { #' @export trainLearner.MultilabelDBRWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, ...) { + targets = getTaskTargetNames(.task) .task = subsetTask(.task, subset = .subset) data = getTaskData(.task) @@ -54,6 +56,7 @@ trainLearner.MultilabelDBRWrapper = function(.learner, .task, .subset = NULL, .w #' @export predictLearner.MultilabelDBRWrapper = function(.learner, .model, .newdata, .subset = NULL, ...) { + models = getLearnerModel(.model, more.unwrap = FALSE) # Level 1 prediction (binary relevance) models.lvl1 = models[seq_along(.model$task.desc$target)] diff --git a/R/MultilabelNestedStackingWrapper.R b/R/MultilabelNestedStackingWrapper.R index 2c8f8529ce..4ec838f16b 100644 --- a/R/MultilabelNestedStackingWrapper.R +++ b/R/MultilabelNestedStackingWrapper.R @@ -24,6 +24,7 @@ #' @export #' @example inst/examples/MultilabelWrapper.R makeMultilabelNestedStackingWrapper = function(learner, order = NULL, cv.folds = 2) { + learner = checkLearner(learner, type = "classif", props = "twoclass") id = stri_paste("multilabel.nestedStacking", getLearnerId(learner), sep = ".") packs = getLearnerPackages(learner) @@ -39,8 +40,9 @@ makeMultilabelNestedStackingWrapper = function(learner, order = NULL, cv.folds = #' @export trainLearner.MultilabelNestedStackingWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, ...) { + if (is.null(.learner$order)) { - order = sample(getTaskTargetNames(.task)) #random order + order = sample(getTaskTargetNames(.task)) # random order } else { order = .learner$order } @@ -55,11 +57,11 @@ trainLearner.MultilabelNestedStackingWrapper = function(.learner, .task, .subset tn = order[i] if (i >= 2) { tnprevious = order[i - 1] - data2 = data.frame(data.nst, data[tnprevious]) #for inner resampling to produce predicted labels + data2 = data.frame(data.nst, data[tnprevious]) # for inner resampling to produce predicted labels innertask = makeClassifTask(id = tnprevious, data = data2, target = tnprevious) rdesc = makeResampleDesc("CV", iters = .learner$cv.folds) r = resample(.learner$next.learner, innertask, rdesc, weights = .weights, show.info = FALSE) - predlabel = as.numeric(as.logical(r$pred$data[order(r$pred$data$id), ]$response)) #did not use getPredictionResponse, because of ordering + predlabel = as.numeric(as.logical(r$pred$data[order(r$pred$data$id), ]$response)) # did not use getPredictionResponse, because of ordering data2 = data.frame(data.nst, data[tn]) data2[[tnprevious]] = predlabel data.nst[[tnprevious]] = predlabel @@ -74,6 +76,7 @@ trainLearner.MultilabelNestedStackingWrapper = function(.learner, .task, .subset #' @export predictLearner.MultilabelNestedStackingWrapper = function(.learner, .model, .newdata, .subset = NULL, ...) { + models = getLearnerModel(.model, more.unwrap = FALSE) predmatrix = matrix(ncol = length(models), nrow = nrow(.newdata), dimnames = list(NULL, names(models))) if (.learner$predict.type == "response") { @@ -87,6 +90,5 @@ predictLearner.MultilabelNestedStackingWrapper = function(.learner, .model, .new .newdata[tn] = predmatrix[, tn] } } - predmatrix[, .model$task.desc$class.levels] #bring labels back in original order + predmatrix[, .model$task.desc$class.levels] # bring labels back in original order } - diff --git a/R/MultilabelStackingWrapper.R b/R/MultilabelStackingWrapper.R index 052f8ea7b9..6d359e53db 100644 --- a/R/MultilabelStackingWrapper.R +++ b/R/MultilabelStackingWrapper.R @@ -21,6 +21,7 @@ #' @export #' @example inst/examples/MultilabelWrapper.R makeMultilabelStackingWrapper = function(learner, cv.folds = 2) { + learner = checkLearner(learner, type = "classif", props = "twoclass") id = stri_paste("multilabel.stacking", getLearnerId(learner), sep = ".") packs = getLearnerPackages(learner) @@ -33,6 +34,7 @@ makeMultilabelStackingWrapper = function(learner, cv.folds = 2) { #' @export trainLearner.MultilabelStackingWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, ...) { + targets = getTaskTargetNames(.task) .task = subsetTask(.task, subset = .subset) data = getTaskData(.task) @@ -40,15 +42,17 @@ trainLearner.MultilabelStackingWrapper = function(.learner, .task, .subset = NUL models.lvl1 = getLearnerModel(train(makeMultilabelBinaryRelevanceWrapper(.learner$next.learner), .task, weights = .weights)) # predict labels f = function(tn) { + data2 = dropNamed(data, setdiff(targets, tn)) ctask = makeClassifTask(id = tn, data = data2, target = tn) rdesc = makeResampleDesc("CV", iters = .learner$cv.folds) r = resample(.learner$next.learner, ctask, rdesc, weights = .weights, show.info = FALSE) - as.numeric(as.logical(r$pred$data[order(r$pred$data$id), ]$response)) #did not use getPredictionResponse, because of ordering + as.numeric(as.logical(r$pred$data[order(r$pred$data$id), ]$response)) # did not use getPredictionResponse, because of ordering } pred.labels = sapply(targets, f) # train meta level learners g = function(tn) { + data.meta = dropNamed(data.frame(data, pred.labels), setdiff(targets, tn)) ctask = makeClassifTask(id = tn, data = data.meta, target = tn) train(.learner$next.learner, ctask, weights = .weights) @@ -59,6 +63,7 @@ trainLearner.MultilabelStackingWrapper = function(.learner, .task, .subset = NUL #' @export predictLearner.MultilabelStackingWrapper = function(.learner, .model, .newdata, .subset = NULL, ...) { + models = getLearnerModel(.model, more.unwrap = FALSE) # Level 1 prediction (binary relevance) models.lvl1 = models[seq_along(.model$task.desc$target)] diff --git a/R/MultilabelTask.R b/R/MultilabelTask.R index a37bbfb40e..ee8dc193d6 100644 --- a/R/MultilabelTask.R +++ b/R/MultilabelTask.R @@ -2,6 +2,7 @@ #' @rdname Task makeMultilabelTask = function(id = deparse(substitute(data)), data, target, weights = NULL, blocking = NULL, coordinates = NULL, fixup.data = "warn", check.data = TRUE) { + assertString(id) assertCharacter(target, any.missing = FALSE, min.len = 2L) assertDataFrame(data) @@ -9,9 +10,9 @@ makeMultilabelTask = function(id = deparse(substitute(data)), data, target, weig assertFlag(check.data) task = makeSupervisedTask("multilabel", data = data, target = target, - weights = weights, blocking = blocking, - coordinates = coordinates, fixup.data = fixup.data, - check.data = check.data) + weights = weights, blocking = blocking, + coordinates = coordinates, fixup.data = fixup.data, + check.data = check.data) # currently we dont do any fixup here if (check.data) { for (cn in target) @@ -23,6 +24,7 @@ makeMultilabelTask = function(id = deparse(substitute(data)), data, target, weig #' @export print.MultilabelTask = function(x, ...) { + y = getTaskTargets(x) sums = colSums(y) print.SupervisedTask(x) @@ -33,6 +35,7 @@ print.MultilabelTask = function(x, ...) { #' @export #' @rdname makeTaskDesc makeMultilabelTaskDesc = function(id, data, target, weights, blocking, coordinates) { + levs = target td = makeTaskDescInternal("multilabel", id, data, target, weights, blocking, coordinates) td$class.levels = levs diff --git a/R/NoFeaturesModel.R b/R/NoFeaturesModel.R index 020474c7d9..9f1fa7db12 100644 --- a/R/NoFeaturesModel.R +++ b/R/NoFeaturesModel.R @@ -1,10 +1,12 @@ makeNoFeaturesModel = function(targets, task.desc) { + setClasses(list(targets = targets, task.desc = task.desc), "NoFeaturesModel") } predictNofeatures = function(model, newdata) { + y = getLearnerModel(model)$targets type = model$learner$type # for regression return constant mean @@ -14,15 +16,17 @@ predictNofeatures = function(model, newdata) { if (type == "classif") { tab = prop.table(table(y)) probs = as.numeric(tab) - if (model$learner$predict.type == "response") + if (model$learner$predict.type == "response") { return(sample(as.factor(names(tab)), nrow(newdata), prob = probs, replace = TRUE)) + } probs = t(replicate(nrow(newdata), probs)) colnames(probs) = names(tab) return(probs) } if (type == "surv") { - if (model$learner$predict.type == "response") + if (model$learner$predict.type == "response") { return(runif(nrow(newdata))) + } # FIXME: probs / brier for survival should use something like median survival time } stopf("NoFeaturesModel for learner type '%s' not implemented", type) diff --git a/R/OptControl.R b/R/OptControl.R index 469890a433..4f55a682a0 100644 --- a/R/OptControl.R +++ b/R/OptControl.R @@ -2,8 +2,9 @@ makeOptControl = function(same.resampling.instance, impute.val = NULL, tune.thre tune.threshold.args = list(), log.fun = "default", final.dw.perc = NULL, ...) { assertFlag(same.resampling.instance) - if (!is.null(impute.val)) + if (!is.null(impute.val)) { assertNumeric(impute.val) + } assertFunction(log.fun, args = c("learner", "task", "resampling", "measures", "par.set", "control", "opt.path", "dob", "x", "y", "remove.nas", "stage", "prev.stage")) assertFlag(tune.threshold) diff --git a/R/OptResult.R b/R/OptResult.R index d9008daecf..2bb0554786 100644 --- a/R/OptResult.R +++ b/R/OptResult.R @@ -1,4 +1,5 @@ makeOptResult = function(learner, control, x, y, resampling, threshold, opt.path, cl, ...) { + res = list( learner = learner, control = control, diff --git a/R/OptWrapper.R b/R/OptWrapper.R index 6a5b36269c..c7773f0641 100644 --- a/R/OptWrapper.R +++ b/R/OptWrapper.R @@ -16,6 +16,7 @@ makeOptWrapper = function(id, learner, resampling, measures, par.set, bit.names, #' @export print.OptModel = function(x, ...) { + print.WrappedModel(x) cat("\nOptimization result:\n") print(x$learner.model$opt.result) diff --git a/R/OverBaggingWrapper.R b/R/OverBaggingWrapper.R index 37c0d48b64..40fb8cf572 100644 --- a/R/OverBaggingWrapper.R +++ b/R/OverBaggingWrapper.R @@ -40,6 +40,7 @@ #' @family wrapper #' @export makeOverBaggingWrapper = function(learner, obw.iters = 10L, obw.rate = 1, obw.maxcl = "boot", obw.cl = NULL) { + learner = checkLearner(learner, "classif") pv = list() if (!missing(obw.iters)) { @@ -59,8 +60,9 @@ makeOverBaggingWrapper = function(learner, obw.iters = 10L, obw.rate = 1, obw.ma pv$obw.cl = obw.cl } - if (learner$predict.type != "response") + if (learner$predict.type != "response") { stop("Predict type of the basic learner must be response.") + } id = stri_paste(learner$id, "overbagged", sep = ".") packs = learner$package ps = makeParamSet( @@ -70,12 +72,12 @@ makeOverBaggingWrapper = function(learner, obw.iters = 10L, obw.rate = 1, obw.ma makeUntypedLearnerParam(id = "obw.cl", default = NULL, tunable = FALSE) ) makeHomogeneousEnsemble(id, "classif", learner, packs, par.set = ps, par.vals = pv, - learner.subclass = c("OverBaggingWrapper", "BaggingWrapper"), model.subclass = "BaggingModel") + learner.subclass = c("OverBaggingWrapper", "BaggingWrapper"), model.subclass = "BaggingModel") } #' @export trainLearner.OverBaggingWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, - obw.iters = 10L, obw.rate = 1, obw.maxcl = "boot", obw.cl = NULL, ...) { + obw.iters = 10L, obw.rate = 1, obw.maxcl = "boot", obw.cl = NULL, ...) { .task = subsetTask(.task, subset = .subset) y = getTaskTargets(.task) @@ -91,6 +93,7 @@ trainLearner.OverBaggingWrapper = function(.learner, .task, .subset = NULL, .wei } doOverBaggingTrainIteration = function(i, y, obw.rate, obw.cl, obw.maxcl, learner, task, weights) { + setSlaveOptions() bag = sampleBinaryClass(y, rate = obw.rate, cl = obw.cl, resample.other.class = (obw.maxcl == "boot")) train(learner$next.learner, task, subset = bag, weights = weights) @@ -99,5 +102,6 @@ doOverBaggingTrainIteration = function(i, y, obw.rate, obw.cl, obw.maxcl, learne #' @export getLearnerProperties.OverBaggingWrapper = function(learner) { + union(getLearnerProperties(learner$next.learner), "prob") } diff --git a/R/OverUnderSampling.R b/R/OverUnderSampling.R index 63251f007b..d963367410 100644 --- a/R/OverUnderSampling.R +++ b/R/OverUnderSampling.R @@ -23,6 +23,7 @@ #' @family imbalancy #' @export oversample = function(task, rate, cl = NULL) { + checkTask(task, "ClassifTask", binary = TRUE) assertNumber(rate, lower = 1) y = getTaskTargets(task) @@ -38,6 +39,7 @@ oversample = function(task, rate, cl = NULL) { #' @rdname oversample #' @export undersample = function(task, rate, cl = NULL) { + checkTask(task, "ClassifTask", binary = TRUE) assertNumber(rate, lower = 0, upper = 1) y = getTaskTargets(task) @@ -49,4 +51,3 @@ undersample = function(task, rate, cl = NULL) { j = sampleBinaryClass(y, rate = rate, cl = cl, resample.other.class = FALSE) subsetTask(task, j) } - diff --git a/R/OverUndersampleWrapper.R b/R/OverUndersampleWrapper.R index d95f485da9..9cdf21a978 100644 --- a/R/OverUndersampleWrapper.R +++ b/R/OverUndersampleWrapper.R @@ -29,6 +29,7 @@ #' @family wrapper #' @export makeUndersampleWrapper = function(learner, usw.rate = 1, usw.cl = NULL) { + learner = checkLearner(learner, "classif") pv = list() if (!missing(usw.rate)) { @@ -51,6 +52,7 @@ makeUndersampleWrapper = function(learner, usw.rate = 1, usw.cl = NULL) { #' @rdname makeUndersampleWrapper #' @export makeOversampleWrapper = function(learner, osw.rate = 1, osw.cl = NULL) { + learner = checkLearner(learner, "classif") pv = list() if (!missing(osw.rate)) { @@ -72,11 +74,12 @@ makeOversampleWrapper = function(learner, osw.rate = 1, osw.cl = NULL) { #' @export trainLearner.UndersampleWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, usw.rate = 1, usw.cl = NULL, ...) { + # If weights vector length fits to task size, set weights before subsetting (Issue #838) if (length(.weights) == getTaskSize(.task)) { .task$weights = .weights .task = subsetTask(.task, .subset) - # otherwise subset first and then set weights + # otherwise subset first and then set weights } else { .task = subsetTask(.task, .subset) .task$weights = .weights @@ -89,11 +92,12 @@ trainLearner.UndersampleWrapper = function(.learner, .task, .subset = NULL, .wei #' @export trainLearner.OversampleWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, osw.rate = 1, osw.cl = NULL, ...) { + # If weights vector length fits to task size, set weights before subsetting (Issue #838) if (length(.weights) == getTaskSize(.task)) { .task$weights = .weights .task = subsetTask(.task, .subset) - # otherwise subset first and then set weights + # otherwise subset first and then set weights } else { .task = subsetTask(.task, .subset) .task$weights = .weights @@ -103,4 +107,3 @@ trainLearner.OversampleWrapper = function(.learner, .task, .subset = NULL, .weig m$train.task = .task makeChainModel(next.model = m, cl = "OversampleModel") } - diff --git a/R/Prediction.R b/R/Prediction.R index 4b7f07c03f..ea0d0cad4c 100644 --- a/R/Prediction.R +++ b/R/Prediction.R @@ -33,11 +33,13 @@ NULL #' Internal, do not use! #' @export makePrediction = function(task.desc, row.names, id, truth, predict.type, predict.threshold = NULL, y, time, error = NA_character_, dump = NULL) { + UseMethod("makePrediction") } #' @export makePrediction.RegrTaskDesc = function(task.desc, row.names, id, truth, predict.type, predict.threshold = NULL, y, time, error = NA_character_, dump = NULL) { + data = namedList(c("id", "truth", "response", "se")) data$id = id data$truth = truth @@ -61,11 +63,13 @@ makePrediction.RegrTaskDesc = function(task.desc, row.names, id, truth, predict. #' @export makePrediction.ClassifTaskDesc = function(task.desc, row.names, id, truth, predict.type, predict.threshold = NULL, y, time, error = NA_character_, dump = NULL) { + data = namedList(c("id", "truth", "response", "prob")) data$id = id # truth can come from a simple "newdata" df. then there might not be all factor levels present - if (!is.null(truth)) + if (!is.null(truth)) { levels(truth) = union(levels(truth), task.desc$class.levels) + } data$truth = truth if (predict.type == "response") { data$response = y @@ -75,8 +79,9 @@ makePrediction.ClassifTaskDesc = function(task.desc, row.names, id, truth, predi data = as.data.frame(filterNull(data)) # fix columnnames for prob if strange chars are in factor levels indices = stri_detect_fixed(names(data), "prob.") - if (sum(indices) > 0) + if (sum(indices) > 0) { names(data)[indices] = stri_paste("prob.", colnames(y)) + } } p = makeS3Obj(c("PredictionClassif", "Prediction"), @@ -102,6 +107,7 @@ makePrediction.ClassifTaskDesc = function(task.desc, row.names, id, truth, predi #' @export makePrediction.MultilabelTaskDesc = function(task.desc, row.names, id, truth, predict.type, predict.threshold = NULL, y, time, error = NA_character_, dump = NULL) { + data = namedList(c("id", "truth", "response", "prob")) data$id = id data$truth = truth @@ -133,6 +139,7 @@ makePrediction.MultilabelTaskDesc = function(task.desc, row.names, id, truth, pr #' @export makePrediction.SurvTaskDesc = function(task.desc, row.names, id, truth, predict.type, predict.threshold = NULL, y, time, error = NA_character_, dump = NULL) { + data = namedList(c("id", "truth.time", "truth.event", "response")) data$id = id # FIXME: recode times @@ -153,6 +160,7 @@ makePrediction.SurvTaskDesc = function(task.desc, row.names, id, truth, predict. #' @export makePrediction.ClusterTaskDesc = function(task.desc, row.names, id, truth, predict.type, predict.threshold = NULL, y, time, error = NA_character_, dump = NULL) { + data = namedList(c("id", "response", "prob")) data$id = id if (predict.type == "response") { @@ -179,6 +187,7 @@ makePrediction.ClusterTaskDesc = function(task.desc, row.names, id, truth, predi #' @export makePrediction.CostSensTaskDesc = function(task.desc, row.names, id, truth, predict.type, predict.threshold = NULL, y, time, error = NA_character_, dump = NULL) { + data = namedList(c("id", "response")) data$id = id data$response = y @@ -196,6 +205,7 @@ makePrediction.CostSensTaskDesc = function(task.desc, row.names, id, truth, pred #' @export print.Prediction = function(x, ...) { + catf("Prediction: %i observations", nrow(x$data)) catf("predict.type: %s", x$predict.type) catf("threshold: %s", collapse(sprintf("%s=%.2f", names(x$threshold), x$threshold))) @@ -203,4 +213,3 @@ print.Prediction = function(x, ...) { if (!is.na(x$error)) catf("errors: %s", x$error) printHead(as.data.frame(x), ...) } - diff --git a/R/Prediction_operators.R b/R/Prediction_operators.R index ffeba4d6d6..76392d9e58 100644 --- a/R/Prediction_operators.R +++ b/R/Prediction_operators.R @@ -1,5 +1,6 @@ #' @export as.data.frame.Prediction = function(x, row.names = NULL, optional = FALSE, ...) { + x$data } @@ -19,42 +20,49 @@ as.data.frame.Prediction = function(x, row.names = NULL, optional = FALSE, ...) #' mod = train(lrn, task) #' # predict probabilities #' pred = predict(mod, newdata = iris) -#' +#' #' # Get probabilities for all classes #' head(getPredictionProbabilities(pred)) -#' +#' #' # Get probabilities for a subset of classes #' head(getPredictionProbabilities(pred, c("setosa", "virginica"))) getPredictionProbabilities = function(pred, cl) { + assertClass(pred, classes = "Prediction") ttype = pred$task.desc$type - if (ttype %nin% c("classif", "cluster", "multilabel")) + if (ttype %nin% c("classif", "cluster", "multilabel")) { stop("Prediction was not generated from a ClassifTask, MultilabelTask or ClusterTask!") + } if (missing(cl)) { if (ttype == "classif") { - if (length(pred$task.desc$class.levels) == 2L) + if (length(pred$task.desc$class.levels) == 2L) { cl = pred$task.desc$positive - else + } else { cl = pred$task.desc$class.levels + } } else if (ttype == "multilabel") { cl = pred$task.desc$class.levels } } else { - if (ttype == "cluster") + if (ttype == "cluster") { stopf("You can only ask for probs of all classes currently in clustering!") - else + } else { assertCharacter(cl, any.missing = FALSE) + } } - if (pred$predict.type != "prob") + if (pred$predict.type != "prob") { stop("Probabilities not present in Prediction object!") + } cns = colnames(pred$data) if (ttype %in% c("classif", "multilabel")) { cl2 = stri_paste("prob", cl, sep = ".") - if (!all(cl2 %in% cns)) + if (!all(cl2 %in% cns)) { stopf("Trying to get probabilities for nonexistant classes: %s", collapse(cl)) + } y = pred$data[, cl2] - if (length(cl) > 1L) + if (length(cl) > 1L) { colnames(y) = cl + } } else if (ttype == "cluster") { y = pred$data[, stri_detect_regex(cns, "prob\\.")] colnames(y) = seq_col(y) @@ -71,6 +79,7 @@ getPredictionProbabilities = function(pred, cl) { #' @export #' @family predict getPredictionTaskDesc = function(pred) { + assertClass(pred, "Prediction") pred$task.desc } @@ -80,11 +89,12 @@ getPredictionTaskDesc = function(pred) { #' @param cl Deprecated. #' @export getProbabilities = function(pred, cl) { + .Deprecated("getPredictionProbabilities") getPredictionProbabilities(pred, cl) } -#c.Prediction = function(...) { +# c.Prediction = function(...) { # preds = list(...) # id = Reduce(c, lapply(preds, function(x) x@id)) # response = Reduce(c, lapply(preds, function(x) x@response)) @@ -92,7 +102,7 @@ getProbabilities = function(pred, cl) { # weights = Reduce(c, lapply(preds, function(x) x@weights)) # prob = Reduce(rbind, lapply(preds, function(x) x@prob)) # return(new("Prediction", task.desc = preds[[1]]@desc, id = id, response = response, target = target, weights = weights, prob = prob)); -#} +# } #' @title Get response / truth from prediction object. @@ -113,17 +123,20 @@ getProbabilities = function(pred, cl) { #' @export #' @family predict getPredictionResponse = function(pred) { + UseMethod("getPredictionResponse") } #' @export getPredictionResponse.default = function(pred) { + # this should work for classif, regr and cluster and surv pred$data[["response"]] } #' @export getPredictionResponse.PredictionMultilabel = function(pred) { + i = stri_detect_regex(colnames(pred$data), "^response\\.") m = as.matrix(pred$data[, i]) setColNames(m, pred$task.desc$class.levels) @@ -132,37 +145,44 @@ getPredictionResponse.PredictionMultilabel = function(pred) { #' @rdname getPredictionResponse #' @export getPredictionSE = function(pred) { + UseMethod("getPredictionSE") } #' @export getPredictionSE.default = function(pred) { + pred$data[["se"]] } #' @rdname getPredictionResponse #' @export getPredictionTruth = function(pred) { + UseMethod("getPredictionTruth") } #' @export getPredictionTruth.default = function(pred) { + pred$data[["truth"]] } #' @export getPredictionTruth.PredictionCluster = function(pred) { + stop("There is no truth for cluster tasks") } #' @export getPredictionTruth.PredictionSurv = function(pred) { + Surv(pred$data$truth.time, pred$data$truth.event, type = "right") } #' @export getPredictionTruth.PredictionMultilabel = function(pred) { + i = stri_detect_regex(colnames(pred$data), "^truth\\.") m = as.matrix(pred$data[, i]) setColNames(m, pred$task.desc$class.levels) @@ -180,5 +200,6 @@ getPredictionTruth.PredictionMultilabel = function(pred) { #' @family debug #' @export getPredictionDump = function(pred) { + pred$dump } diff --git a/R/PreprocWrapper.R b/R/PreprocWrapper.R index e3e836dd12..12181c299b 100644 --- a/R/PreprocWrapper.R +++ b/R/PreprocWrapper.R @@ -1,4 +1,4 @@ -#FIXME: use learnerparam or ordinary params? +# FIXME: use learnerparam or ordinary params? #' Fuse learner with preprocessing. #' @@ -32,13 +32,15 @@ #' @family wrapper #' @export makePreprocWrapper = function(learner, train, predict, par.set = makeParamSet(), par.vals = list()) { + learner = checkLearner(learner) assertFunction(train, args = c("data", "target", "args")) assertFunction(predict, args = c("data", "target", "args", "control")) assertClass(par.set, classes = "ParamSet") checkList(par.vals) - if (!isProperlyNamed(par.vals)) + if (!isProperlyNamed(par.vals)) { stop("'par.vals' must be a properly named list!") + } id = stri_paste(learner$id, "preproc", sep = ".") x = makeBaseWrapper(id, type = learner$type, next.learner = learner, par.set = par.set, @@ -50,13 +52,15 @@ makePreprocWrapper = function(learner, train, predict, par.set = makeParamSet(), #' @export trainLearner.PreprocWrapper = function(.learner, .task, .subset = NULL, ...) { + pvs = .learner$par.vals pp = .learner$train(data = getTaskData(.task, .subset, functionals.as = "matrix"), target = getTaskTargetNames(.task), args = pvs) # FIXME: why is the order important? if (!(is.list(pp) && length(pp) == 2L && all(names(pp) == c("data", "control")) && - is.data.frame(pp$data) && is.list(pp$control))) + is.data.frame(pp$data) && is.list(pp$control))) { stop("Preprocessing train must result in list wil elements data[data.frame] and control[list]!") + } .task = changeData(.task, pp$data) # we have already subsetted! m = train(.learner$next.learner, .task) @@ -70,9 +74,11 @@ trainLearner.PreprocWrapper = function(.learner, .task, .subset = NULL, ...) { #' @export predictLearner.PreprocWrapper = function(.learner, .model, .newdata, ...) { + .newdata = .learner$predict(.newdata, .model$task.desc$target, .learner$par.vals, .model$learner.model$control) - if (!is.data.frame(.newdata)) + if (!is.data.frame(.newdata)) { stop("Preprocessing must result in a data.frame!") + } NextMethod(.newdata = .newdata) } diff --git a/R/PreprocWrapperCaret.R b/R/PreprocWrapperCaret.R index a4e9f9b0e9..4dead00178 100644 --- a/R/PreprocWrapperCaret.R +++ b/R/PreprocWrapperCaret.R @@ -50,17 +50,18 @@ makePreprocWrapperCaret = function(learner, ...) { par.vals = insert(par.vals, list(...)) trainfun = function(data, target, args) { + all.methods = c( - "BoxCox", "YeoJohnson", "expoTrans", "center", - "scale", "range", "knnImpute", "bagImpute", - "medianImpute", "pca", "ica", "spatialSign", - "zv", "nzv", "corr" + "BoxCox", "YeoJohnson", "expoTrans", "center", + "scale", "range", "knnImpute", "bagImpute", + "medianImpute", "pca", "ica", "spatialSign", + "zv", "nzv", "corr" ) logindex = c( - args$ppc.BoxCox, args$ppc.YeoJohnson, args$ppc.expoTrans, args$ppc.center, - args$ppc.scale, args$ppc.range, args$ppc.knnImpute, args$ppc.bagImpute, - args$ppc.medianImpute, args$ppc.pca, args$ppc.ica, args$ppc.spatialSign, - args$ppc.zv, args$ppc.nzv, args$ppc.corr + args$ppc.BoxCox, args$ppc.YeoJohnson, args$ppc.expoTrans, args$ppc.center, + args$ppc.scale, args$ppc.range, args$ppc.knnImpute, args$ppc.bagImpute, + args$ppc.medianImpute, args$ppc.pca, args$ppc.ica, args$ppc.spatialSign, + args$ppc.zv, args$ppc.nzv, args$ppc.corr ) cargs = list( @@ -87,6 +88,7 @@ makePreprocWrapperCaret = function(learner, ...) { } predictfun = function(data, target, args, control) { + data.frame(predict(control, data)) } @@ -96,6 +98,7 @@ makePreprocWrapperCaret = function(learner, ...) { #' @export getLearnerProperties.PreprocWrapperCaret = function(learner) { + props = getLearnerProperties(learner$next.learner) par.vals = getHyperPars(learner) if (par.vals$ppc.bagImpute | par.vals$ppc.knnImpute | par.vals$ppc.medianImpute) { diff --git a/R/RLearner.R b/R/RLearner.R index d45f286c58..9c156c1803 100644 --- a/R/RLearner.R +++ b/R/RLearner.R @@ -55,6 +55,7 @@ NULL #' @export #' @rdname RLearner makeRLearner = function() { + UseMethod("makeRLearner") } @@ -72,8 +73,9 @@ makeRLearnerInternal = function(id, type, package, par.set, par.vals, properties assertClass(par.set, classes = "ParamSet") checkListElementClass(par.set$pars, "LearnerParam") assertList(par.vals) - if (!isProperlyNamed(par.vals)) + if (!isProperlyNamed(par.vals)) { stop("Argument par.vals must be a properly named list!") + } assertString(name) assertString(short.name) assertString(note) @@ -109,10 +111,11 @@ makeRLearnerClassif = function(cl, package, par.set, par.vals = list(), properti # include the class.weights.param if ("class.weights" %in% getLearnerProperties(lrn)) { assertString(class.weights.param) - if (!is.null(par.set$pars[[class.weights.param]])) + if (!is.null(par.set$pars[[class.weights.param]])) { lrn$class.weights.param = class.weights.param - else + } else { stopf("'%s' needs to be defined in the parameter set as well.", class.weights.param) + } } return(lrn) } @@ -120,6 +123,7 @@ makeRLearnerClassif = function(cl, package, par.set, par.vals = list(), properti #' @export #' @rdname RLearner makeRLearnerMultilabel = function(cl, package, par.set, par.vals = list(), properties = character(0L), name = cl, short.name = cl, note = "", callees = character(0L)) { + addClasses( makeRLearnerInternal(cl, "multilabel", package, par.set, par.vals, properties, name, short.name, note, callees), c(cl, "RLearnerMultilabel") @@ -129,6 +133,7 @@ makeRLearnerMultilabel = function(cl, package, par.set, par.vals = list(), prope #' @export #' @rdname RLearner makeRLearnerRegr = function(cl, package, par.set, par.vals = list(), properties = character(0L), name = cl, short.name = cl, note = "", callees = character(0L)) { + addClasses( makeRLearnerInternal(cl, "regr", package, par.set, par.vals, properties, name, short.name, note, callees), c(cl, "RLearnerRegr") @@ -138,6 +143,7 @@ makeRLearnerRegr = function(cl, package, par.set, par.vals = list(), properties #' @export #' @rdname RLearner makeRLearnerSurv = function(cl, package, par.set, par.vals = list(), properties = character(0L), name = cl, short.name = cl, note = "", callees = character(0L)) { + addClasses( makeRLearnerInternal(cl, "surv", package, par.set, par.vals, properties, name, short.name, note, callees), c(cl, "RLearnerSurv") @@ -147,6 +153,7 @@ makeRLearnerSurv = function(cl, package, par.set, par.vals = list(), properties #' @export #' @rdname RLearner makeRLearnerCluster = function(cl, package, par.set, par.vals = list(), properties = character(0L), name = cl, short.name = cl, note = "", callees = character(0L)) { + addClasses( makeRLearnerInternal(cl, "cluster", package, par.set, par.vals, properties, name, short.name, note, callees), c(cl, "RLearnerCluster") diff --git a/R/RLearner_classif_C50.R b/R/RLearner_classif_C50.R index 96f1b15548..d322f61354 100644 --- a/R/RLearner_classif_C50.R +++ b/R/RLearner_classif_C50.R @@ -1,26 +1,27 @@ #' @export makeRLearner.classif.C50 = function() { + makeRLearnerClassif( cl = "classif.C50", package = "C50", par.set = makeParamSet( - makeIntegerLearnerParam(id = "trials", lower = 1L, default = 1L), - makeLogicalLearnerParam(id = "rules", default = FALSE, tunable = FALSE), - makeLogicalLearnerParam(id = "subset", default = FALSE), - # FIXME: Default = 0 throws error because 'lower' = 2L is above default. - makeIntegerLearnerParam(id = "bands", lower = 2L, upper = 1000L, - tunable = FALSE, requires = quote(rules == TRUE)), - makeLogicalLearnerParam(id = "winnow", default = FALSE), - makeLogicalLearnerParam(id = "noGlobalPruning", default = FALSE), - makeNumericLearnerParam(id = "CF", lower = 0, upper = 1, default = 0.25), - # FIXME: upper limit is data dependent - makeIntegerLearnerParam(id = "minCases", lower = 0L, upper = Inf, default = 2L), - makeLogicalLearnerParam(id = "fuzzyThreshold", default = FALSE), - makeNumericLearnerParam(id = "sample", lower = 0, upper = .999, default = 0, tunable = TRUE), - makeIntegerLearnerParam(id = "seed", lower = -Inf, upper = Inf, tunable = FALSE), - makeLogicalLearnerParam(id = "earlyStopping", default = TRUE), - # label just changes the word 'outcome' to something else in the output - makeUntypedLearnerParam(id = "label", default = "outcome", tunable = FALSE) + makeIntegerLearnerParam(id = "trials", lower = 1L, default = 1L), + makeLogicalLearnerParam(id = "rules", default = FALSE, tunable = FALSE), + makeLogicalLearnerParam(id = "subset", default = FALSE), + # FIXME: Default = 0 throws error because 'lower' = 2L is above default. + makeIntegerLearnerParam(id = "bands", lower = 2L, upper = 1000L, + tunable = FALSE, requires = quote(rules == TRUE)), + makeLogicalLearnerParam(id = "winnow", default = FALSE), + makeLogicalLearnerParam(id = "noGlobalPruning", default = FALSE), + makeNumericLearnerParam(id = "CF", lower = 0, upper = 1, default = 0.25), + # FIXME: upper limit is data dependent + makeIntegerLearnerParam(id = "minCases", lower = 0L, upper = Inf, default = 2L), + makeLogicalLearnerParam(id = "fuzzyThreshold", default = FALSE), + makeNumericLearnerParam(id = "sample", lower = 0, upper = .999, default = 0, tunable = TRUE), + makeIntegerLearnerParam(id = "seed", lower = -Inf, upper = Inf, tunable = FALSE), + makeLogicalLearnerParam(id = "earlyStopping", default = TRUE), + # label just changes the word 'outcome' to something else in the output + makeUntypedLearnerParam(id = "label", default = "outcome", tunable = FALSE) ), properties = c("twoclass", "multiclass", "numerics", "factors", "prob", "missings", "weights"), name = "C50", @@ -39,15 +40,17 @@ trainLearner.classif.C50 = function(.learner, .task, .subset, .weights = NULL, label) d = getTaskData(.task, .subset, target.extra = TRUE) - C50::C5.0(x = d$data, y = d$target, control = ctrl, weights = .weights, ...) + C50::C5.0(x = d$data, y = d$target, control = ctrl, weights = .weights, ...) } #' @export predictLearner.classif.C50 = function(.learner, .model, .newdata, ...) { + m = .model$learner.model pred.type = .learner$predict.type - if (pred.type == "response") + if (pred.type == "response") { pred.type = "class" + } preds = predict(m, newdata = .newdata, type = pred.type, ...) return(preds) } diff --git a/R/RLearner_classif_IBk.R b/R/RLearner_classif_IBk.R index 4f9621ffa7..0317746913 100644 --- a/R/RLearner_classif_IBk.R +++ b/R/RLearner_classif_IBk.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.IBk = function() { + makeRLearnerClassif( cl = "classif.IBk", package = "RWeka", @@ -21,13 +22,15 @@ makeRLearner.classif.IBk = function() { } #' @export -trainLearner.classif.IBk = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.IBk = function(.learner, .task, .subset, .weights = NULL, ...) { + ctrl = RWeka::Weka_control(...) RWeka::IBk(getTaskFormula(.task), data = getTaskData(.task, .subset), control = ctrl, na.action = na.pass) } #' @export predictLearner.classif.IBk = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, prob = "prob", "class") predict(.model$learner.model, newdata = .newdata, type = type, ...) } diff --git a/R/RLearner_classif_J48.R b/R/RLearner_classif_J48.R index be7ff281de..322674f71d 100644 --- a/R/RLearner_classif_J48.R +++ b/R/RLearner_classif_J48.R @@ -1,6 +1,7 @@ # checked props #' @export makeRLearner.classif.J48 = function() { + makeRLearnerClassif( cl = "classif.J48", package = "RWeka", @@ -28,7 +29,8 @@ makeRLearner.classif.J48 = function() { } #' @export -trainLearner.classif.J48 = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.J48 = function(.learner, .task, .subset, .weights = NULL, ...) { + ctrl = RWeka::Weka_control(..., Q = as.integer(runif(1, min = -.Machine$integer.max, max = .Machine$integer.max))) f = getTaskFormula(.task) RWeka::J48(f, data = getTaskData(.task, .subset), control = ctrl, na.action = na.pass) @@ -36,6 +38,7 @@ trainLearner.classif.J48 = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.J48 = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, prob = "prob", "class") predict(.model$learner.model, newdata = .newdata, type = type, ...) } diff --git a/R/RLearner_classif_JRip.R b/R/RLearner_classif_JRip.R index 1abdbc8198..7998f79272 100644 --- a/R/RLearner_classif_JRip.R +++ b/R/RLearner_classif_JRip.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.JRip = function() { + makeRLearnerClassif( cl = "classif.JRip", package = "RWeka", @@ -22,7 +23,8 @@ makeRLearner.classif.JRip = function() { } #' @export -trainLearner.classif.JRip = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.JRip = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) ctrl = RWeka::Weka_control(..., S = as.integer(runif(1, min = -.Machine$integer.max, max = .Machine$integer.max))) RWeka::JRip(f, data = getTaskData(.task, .subset), control = ctrl, na.action = na.pass) @@ -30,6 +32,7 @@ trainLearner.classif.JRip = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.JRip = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, prob = "prob", "class") predict(.model$learner.model, newdata = .newdata, type = type, ...) } diff --git a/R/RLearner_classif_LiblineaRL1L2SVC.R b/R/RLearner_classif_LiblineaRL1L2SVC.R index aab3650056..1d8b7d6b8f 100644 --- a/R/RLearner_classif_LiblineaRL1L2SVC.R +++ b/R/RLearner_classif_LiblineaRL1L2SVC.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.LiblineaRL1L2SVC = function() { + makeRLearnerClassif( cl = "classif.LiblineaRL1L2SVC", package = "LiblineaR", @@ -21,11 +22,13 @@ makeRLearner.classif.LiblineaRL1L2SVC = function() { #' @export trainLearner.classif.LiblineaRL1L2SVC = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) LiblineaR::LiblineaR(data = d$data, target = d$target, type = 5L, ...) } #' @export predictLearner.classif.LiblineaRL1L2SVC = function(.learner, .model, .newdata, ...) { - as.factor(predict(.model$learner.model, newx = .newdata, ...)$predictions) + + as.factor(predict(.model$learner.model, newx = .newdata, ...)$predictions) } diff --git a/R/RLearner_classif_LiblineaRL1LogReg.R b/R/RLearner_classif_LiblineaRL1LogReg.R index 6386ff3948..56e026b41e 100644 --- a/R/RLearner_classif_LiblineaRL1LogReg.R +++ b/R/RLearner_classif_LiblineaRL1LogReg.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.LiblineaRL1LogReg = function() { + makeRLearnerClassif( cl = "classif.LiblineaRL1LogReg", package = "LiblineaR", @@ -21,14 +22,17 @@ makeRLearner.classif.LiblineaRL1LogReg = function() { #' @export trainLearner.classif.LiblineaRL1LogReg = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) LiblineaR::LiblineaR(data = d$data, target = d$target, type = 6L, ...) } #' @export predictLearner.classif.LiblineaRL1LogReg = function(.learner, .model, .newdata, ...) { - if (.learner$predict.type == "response") + + if (.learner$predict.type == "response") { as.factor(predict(.model$learner.model, newx = .newdata, ...)$predictions) - else + } else { predict(.model$learner.model, newx = .newdata, proba = TRUE, ...)$probabilities + } } diff --git a/R/RLearner_classif_LiblineaRL2L1SVC.R b/R/RLearner_classif_LiblineaRL2L1SVC.R index bd98808351..35e5180b36 100644 --- a/R/RLearner_classif_LiblineaRL2L1SVC.R +++ b/R/RLearner_classif_LiblineaRL2L1SVC.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.LiblineaRL2L1SVC = function() { + makeRLearnerClassif( cl = "classif.LiblineaRL2L1SVC", package = "LiblineaR", @@ -21,11 +22,13 @@ makeRLearner.classif.LiblineaRL2L1SVC = function() { #' @export trainLearner.classif.LiblineaRL2L1SVC = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) LiblineaR::LiblineaR(data = d$data, target = d$target, type = 3L, ...) } #' @export predictLearner.classif.LiblineaRL2L1SVC = function(.learner, .model, .newdata, ...) { - as.factor(predict(.model$learner.model, newx = .newdata, ...)$predictions) + + as.factor(predict(.model$learner.model, newx = .newdata, ...)$predictions) } diff --git a/R/RLearner_classif_LiblineaRL2LogReg.R b/R/RLearner_classif_LiblineaRL2LogReg.R index d175ba3425..81dad83d79 100644 --- a/R/RLearner_classif_LiblineaRL2LogReg.R +++ b/R/RLearner_classif_LiblineaRL2LogReg.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.LiblineaRL2LogReg = function() { + makeRLearnerClassif( cl = "classif.LiblineaRL2LogReg", package = "LiblineaR", @@ -27,14 +28,17 @@ makeRLearner.classif.LiblineaRL2LogReg = function() { #' @export trainLearner.classif.LiblineaRL2LogReg = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) LiblineaR::LiblineaR(data = d$data, target = d$target, ...) } #' @export predictLearner.classif.LiblineaRL2LogReg = function(.learner, .model, .newdata, ...) { - if (.learner$predict.type == "response") + + if (.learner$predict.type == "response") { as.factor(predict(.model$learner.model, newx = .newdata, ...)$predictions) - else + } else { predict(.model$learner.model, newx = .newdata, proba = TRUE, ...)$probabilities + } } diff --git a/R/RLearner_classif_LiblineaRL2SVC.R b/R/RLearner_classif_LiblineaRL2SVC.R index 6df7ade314..71e7ec8e97 100644 --- a/R/RLearner_classif_LiblineaRL2SVC.R +++ b/R/RLearner_classif_LiblineaRL2SVC.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.LiblineaRL2SVC = function() { + makeRLearnerClassif( cl = "classif.LiblineaRL2SVC", package = "LiblineaR", @@ -27,11 +28,13 @@ makeRLearner.classif.LiblineaRL2SVC = function() { #' @export trainLearner.classif.LiblineaRL2SVC = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) LiblineaR::LiblineaR(data = d$data, target = d$target, ...) } #' @export predictLearner.classif.LiblineaRL2SVC = function(.learner, .model, .newdata, ...) { - as.factor(predict(.model$learner.model, newx = .newdata, ...)$predictions) + + as.factor(predict(.model$learner.model, newx = .newdata, ...)$predictions) } diff --git a/R/RLearner_classif_LiblineaRMultiClassSVC.R b/R/RLearner_classif_LiblineaRMultiClassSVC.R index ca13d50de4..282284f0a0 100644 --- a/R/RLearner_classif_LiblineaRMultiClassSVC.R +++ b/R/RLearner_classif_LiblineaRMultiClassSVC.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.LiblineaRMultiClassSVC = function() { + makeRLearnerClassif( cl = "classif.LiblineaRMultiClassSVC", package = "LiblineaR", @@ -21,11 +22,13 @@ makeRLearner.classif.LiblineaRMultiClassSVC = function() { #' @export trainLearner.classif.LiblineaRMultiClassSVC = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) LiblineaR::LiblineaR(data = d$data, target = d$target, type = 4L, ...) } #' @export predictLearner.classif.LiblineaRMultiClassSVC = function(.learner, .model, .newdata, ...) { - as.factor(predict(.model$learner.model, newx = .newdata, ...)$predictions) + + as.factor(predict(.model$learner.model, newx = .newdata, ...)$predictions) } diff --git a/R/RLearner_classif_OneR.R b/R/RLearner_classif_OneR.R index 50b7b64227..93f31bd910 100644 --- a/R/RLearner_classif_OneR.R +++ b/R/RLearner_classif_OneR.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.OneR = function() { + makeRLearnerClassif( cl = "classif.OneR", package = "RWeka", @@ -16,7 +17,8 @@ makeRLearner.classif.OneR = function() { } #' @export -trainLearner.classif.OneR = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.OneR = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) ctrl = RWeka::Weka_control(...) RWeka::OneR(f, data = getTaskData(.task, .subset), control = ctrl, na.action = na.pass) @@ -24,6 +26,7 @@ trainLearner.classif.OneR = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.OneR = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, prob = "prob", "class") predict(.model$learner.model, newdata = .newdata, type = type, ...) } diff --git a/R/RLearner_classif_PART.R b/R/RLearner_classif_PART.R index 486ad4dbda..d9cde20605 100644 --- a/R/RLearner_classif_PART.R +++ b/R/RLearner_classif_PART.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.PART = function() { + makeRLearnerClassif( cl = "classif.PART", package = "RWeka", @@ -23,7 +24,8 @@ makeRLearner.classif.PART = function() { } #' @export -trainLearner.classif.PART = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.PART = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) ctrl = RWeka::Weka_control(..., Q = as.integer(runif(1L, min = -.Machine$integer.max, max = .Machine$integer.max))) RWeka::PART(f, data = getTaskData(.task, .subset), control = ctrl, na.action = na.pass) @@ -31,6 +33,7 @@ trainLearner.classif.PART = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.PART = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, prob = "prob", "class") predict(.model$learner.model, newdata = .newdata, type = type, ...) } diff --git a/R/RLearner_classif_RRF.R b/R/RLearner_classif_RRF.R index 212961ca6c..e4389bcabb 100644 --- a/R/RLearner_classif_RRF.R +++ b/R/RLearner_classif_RRF.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.RRF = function() { + makeRLearnerClassif( cl = "classif.RRF", package = "RRF", @@ -12,9 +13,9 @@ makeRLearner.classif.RRF = function() { makeLogicalLearnerParam(id = "replace", default = TRUE), makeIntegerLearnerParam(id = "flagReg", default = 1L, lower = 0), makeNumericLearnerParam(id = "coefReg", default = 0.8, - requires = quote(flagReg == 1L)), + requires = quote(flagReg == 1L)), makeIntegerVectorLearnerParam(id = "feaIni", lower = 0, upper = Inf, - requires = quote(flagReg == 1L)), + requires = quote(flagReg == 1L)), makeNumericVectorLearnerParam(id = "classwt", lower = 0, upper = 1L), makeNumericVectorLearnerParam(id = "cutoff", lower = 0, upper = 1L), makeIntegerLearnerParam(id = "maxnodes", lower = 1L), @@ -39,12 +40,14 @@ makeRLearner.classif.RRF = function() { #' @export trainLearner.classif.RRF = function(.learner, .task, .subset, .weights, ...) { + RRF::RRF(formula = getTaskFormula(.task), data = getTaskData(.task, .subset), - keep.forest = TRUE, ...) + keep.forest = TRUE, ...) } #' @export predictLearner.classif.RRF = function(.learner, .model, .newdata, ...) { + type = ifelse(.learner$predict.type == "response", "response", "prob") p = predict(object = .model$learner.model, newdata = .newdata, type = type, ...) return(p) @@ -52,14 +55,16 @@ predictLearner.classif.RRF = function(.learner, .model, .newdata, ...) { #' @export getFeatureImportanceLearner.classif.RRF = function(.learner, .model, ...) { + mod = getLearnerModel(.model, more.unwrap = TRUE) ctrl = list(...) if (is.null(ctrl$type)) { ctrl$type = 2L } else if (ctrl$type == 1L) { has.fiv = .learner$par.vals$importance - if (is.null(has.fiv) || has.fiv != TRUE) + if (is.null(has.fiv) || has.fiv != TRUE) { stop("You need to train the learner with parameter 'importance' set to TRUE") + } } RRF::importance(mod, ctrl$type)[, 1] diff --git a/R/RLearner_classif_ada.R b/R/RLearner_classif_ada.R index a04cb101b4..da5fb73a3e 100644 --- a/R/RLearner_classif_ada.R +++ b/R/RLearner_classif_ada.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.ada = function() { + makeRLearnerClassif( cl = "classif.ada", package = c("ada", "rpart"), @@ -35,7 +36,8 @@ makeRLearner.classif.ada = function() { } #' @export -trainLearner.classif.ada = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.ada = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) dots = list(...) # get names of rpart.control args @@ -46,6 +48,7 @@ trainLearner.classif.ada = function(.learner, .task, .subset, .weights = NULL, # execute ada with proper args ada.args = c(dots, control = list(ctrl.args)) ada.fun = function(...) { + ada::ada(f, getTaskData(.task, .subset), ...) } do.call(ada.fun, ada.args) @@ -53,10 +56,12 @@ trainLearner.classif.ada = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.ada = function(.learner, .model, .newdata, ...) { + type = ifelse(.learner$predict.type == "response", "vector", "probs") mod = getLearnerModel(.model) p = predict(mod, newdata = .newdata, type = type, ...) - if (type == "probs") + if (type == "probs") { colnames(p) = rownames(mod$confusion) + } return(p) } diff --git a/R/RLearner_classif_adaboostm1.R b/R/RLearner_classif_adaboostm1.R index f02b57b118..3f8327b930 100644 --- a/R/RLearner_classif_adaboostm1.R +++ b/R/RLearner_classif_adaboostm1.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.adaboostm1 = function() { + makeRLearnerClassif( cl = "classif.adaboostm1", package = "RWeka", @@ -21,7 +22,8 @@ makeRLearner.classif.adaboostm1 = function() { } #' @export -trainLearner.classif.adaboostm1 = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.adaboostm1 = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) ctrl = RWeka::Weka_control(...) RWeka::AdaBoostM1(f, data = getTaskData(.task, .subset), control = ctrl, na.action = na.pass) @@ -29,6 +31,7 @@ trainLearner.classif.adaboostm1 = function(.learner, .task, .subset, .weights = #' @export predictLearner.classif.adaboostm1 = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, prob = "prob", "class") predict(.model$learner.model, newdata = .newdata, type = type, ...) } diff --git a/R/RLearner_classif_bartMachine.R b/R/RLearner_classif_bartMachine.R index cd6ec1c8ff..4a1db20cdf 100644 --- a/R/RLearner_classif_bartMachine.R +++ b/R/RLearner_classif_bartMachine.R @@ -1,9 +1,10 @@ -#FIXME: I have no idea which routine internally prints to which fucking stream +# FIXME: I have no idea which routine internally prints to which fucking stream # but neither verbose=FALSE can sicth off the iteration output in all case, nor # can I suppress it with capture.output or suppressMessages #' @export makeRLearner.classif.bartMachine = function() { + makeRLearnerClassif( cl = "classif.bartMachine", package = "bartMachine", @@ -42,6 +43,7 @@ makeRLearner.classif.bartMachine = function() { #' @export trainLearner.classif.bartMachine = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) y = d$target td = getTaskDesc(.task) @@ -52,9 +54,10 @@ trainLearner.classif.bartMachine = function(.learner, .task, .subset, .weights = #' @export predictLearner.classif.bartMachine = function(.learner, .model, .newdata, ...) { + td = .model$task.desc levs = c(td$positive, td$negative) - if (.learner$predict.type == "prob"){ + if (.learner$predict.type == "prob") { p = predict(.model$learner.model, new_data = .newdata, type = "prob", ...) y = propVectorToMatrix(1 - p, levs) } else { diff --git a/R/RLearner_classif_binomial.R b/R/RLearner_classif_binomial.R index a148fa3663..281c2951de 100644 --- a/R/RLearner_classif_binomial.R +++ b/R/RLearner_classif_binomial.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.binomial = function() { + makeRLearnerClassif( cl = "classif.binomial", package = "stats", @@ -21,12 +22,14 @@ makeRLearner.classif.binomial = function() { #' @export trainLearner.classif.binomial = function(.learner, .task, .subset, .weights = NULL, link = "logit", ...) { + f = getTaskFormula(.task) stats::glm(f, data = getTaskData(.task, .subset), family = stats::binomial(link = link), weights = .weights, ...) } #' @export predictLearner.classif.binomial = function(.learner, .model, .newdata, ...) { + x = predict(.model$learner.model, newdata = .newdata, type = "response", ...) levs = .model$task.desc$class.levels if (.learner$predict.type == "prob") { @@ -37,5 +40,3 @@ predictLearner.classif.binomial = function(.learner, .model, .newdata, ...) { unname(p) } } - - diff --git a/R/RLearner_classif_boosting.R b/R/RLearner_classif_boosting.R index d656cddda6..0bb45a3c71 100644 --- a/R/RLearner_classif_boosting.R +++ b/R/RLearner_classif_boosting.R @@ -1,6 +1,7 @@ # FIXME: interface was changed, read page, pars, maybe rename #' @export makeRLearner.classif.boosting = function() { + makeRLearnerClassif( cl = "classif.boosting", package = c("adabag", "rpart"), @@ -31,6 +32,7 @@ makeRLearner.classif.boosting = function() { #' @export trainLearner.classif.boosting = function(.learner, .task, .subset, .weights = NULL, minsplit, minbucket, cp, maxcompete, maxsurrogate, usesurrogate, surrogatestyle, maxdepth, xval, ...) { + f = getTaskFormula(.task) ctrl = learnerArgsToControl(rpart::rpart.control, minsplit, minbucket, cp, maxcompete, maxsurrogate, usesurrogate, surrogatestyle, maxdepth, xval) adabag::boosting(f, data = getTaskData(.task, .subset), control = ctrl, ...) @@ -38,6 +40,7 @@ trainLearner.classif.boosting = function(.learner, .task, .subset, .weights = NU #' @export predictLearner.classif.boosting = function(.learner, .model, .newdata, ...) { + levs = .model$task.desc$class.levels # stupid adaboost .newdata[, .model$task.desc$target] = factor(rep(1, nrow(.newdata)), levels = levs) @@ -51,6 +54,7 @@ predictLearner.classif.boosting = function(.learner, .model, .newdata, ...) { #' @export getFeatureImportanceLearner.classif.boosting = function(.learner, .model, ...) { + mod = getLearnerModel(.model, more.unwrap = TRUE) mod$importance[.model$features] } diff --git a/R/RLearner_classif_bst.R b/R/RLearner_classif_bst.R index 500bab2e7f..d77a309bda 100644 --- a/R/RLearner_classif_bst.R +++ b/R/RLearner_classif_bst.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.bst = function() { + makeRLearnerClassif( cl = "classif.bst", package = c("bst", "rpart"), @@ -41,14 +42,16 @@ makeRLearner.classif.bst = function() { trainLearner.classif.bst = function(.learner, .task, .subset, .weights = NULL, mstop, nu, twinboost, f.init, xselect.init, center, trace, numsample, df, minsplit, minbucket, cp, maxsurrogate, usesurrogate, surrogatestyle, maxdepth, xval, Learner, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE, recode.target = "-1+1") ctrl = learnerArgsToControl(bst::bst_control, mstop, nu, twinboost, f.init, xselect.init, center, trace, numsample, df) - control.tree = learnerArgsToControl(list, minsplit, minbucket, cp, maxsurrogate, usesurrogate, surrogatestyle, maxdepth, xval) + control.tree = learnerArgsToControl(list, minsplit, minbucket, cp, maxsurrogate, usesurrogate, surrogatestyle, maxdepth, xval) bst::bst(x = d$data, y = d$target, ctrl = ctrl, control.tree = control.tree, learner = Learner, ...) } #' @export predictLearner.classif.bst = function(.learner, .model, .newdata, ...) { + levs = c(.model$task.desc$negative, .model$task.desc$positive) p = predict(.model$learner.model, .newdata, ...) as.factor(ifelse(p > 0, levs[2L], levs[1L])) diff --git a/R/RLearner_classif_cforest.R b/R/RLearner_classif_cforest.R index a085f23dd1..9183025b40 100644 --- a/R/RLearner_classif_cforest.R +++ b/R/RLearner_classif_cforest.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.cforest = function() { + makeRLearnerClassif( cl = "classif.cforest", package = "party", @@ -37,6 +38,7 @@ trainLearner.classif.cforest = function(.learner, .task, .subset, .weights = NULL, ntree, mtry, replace, fraction, trace, teststat, testtype, mincriterion, minsplit, minbucket, stump, nresample, maxsurrogate, maxdepth, savesplitstats, ...) { + f = getTaskFormula(.task) d = getTaskData(.task, .subset) defaults = getDefaults(getParamSet(.learner)) @@ -54,6 +56,7 @@ trainLearner.classif.cforest = function(.learner, .task, .subset, #' @export predictLearner.classif.cforest = function(.learner, .model, .newdata, ...) { + if (.learner$predict.type == "prob") { p = predict(.model$learner.model, newdata = .newdata, type = "prob", ...) # FIXME: this will break for nrow(.newdata) == 1? do not use sapply! @@ -67,6 +70,7 @@ predictLearner.classif.cforest = function(.learner, .model, .newdata, ...) { #' @export getFeatureImportanceLearner.classif.cforest = function(.learner, .model, auc = FALSE, ...) { + mod = getLearnerModel(.model, more.unwrap = TRUE) if (auc) { party::varimpAUC(mod, ...) diff --git a/R/RLearner_classif_clusterSVM.R b/R/RLearner_classif_clusterSVM.R index b8ef2d0577..a921abdab1 100644 --- a/R/RLearner_classif_clusterSVM.R +++ b/R/RLearner_classif_clusterSVM.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.clusterSVM = function() { + makeRLearnerClassif( cl = "classif.clusterSVM", package = c("SwarmSVM", "LiblineaR"), @@ -35,11 +36,13 @@ makeRLearner.classif.clusterSVM = function() { #' @export trainLearner.classif.clusterSVM = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) SwarmSVM::clusterSVM(x = d$data, y = d$target, ...) } #' @export predictLearner.classif.clusterSVM = function(.learner, .model, .newdata, ...) { + as.factor(predict(.model$learner.model, newdata = .newdata, ...)$predictions) } diff --git a/R/RLearner_classif_ctree.R b/R/RLearner_classif_ctree.R index eea9b78d9b..d0790c2691 100644 --- a/R/RLearner_classif_ctree.R +++ b/R/RLearner_classif_ctree.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.ctree = function() { + makeRLearnerClassif( cl = "classif.ctree", package = "party", @@ -37,6 +38,7 @@ trainLearner.classif.ctree = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.ctree = function(.learner, .model, .newdata, ...) { + if (.learner$predict.type == "prob") { m = .model$learner.model p = party::treeresponse(m, newdata = .newdata, ...) diff --git a/R/RLearner_classif_cvglmnet.R b/R/RLearner_classif_cvglmnet.R index 56d1c113b0..af2ae2456c 100644 --- a/R/RLearner_classif_cvglmnet.R +++ b/R/RLearner_classif_cvglmnet.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.cvglmnet = function() { + makeRLearnerClassif( cl = "classif.cvglmnet", package = "glmnet", @@ -46,12 +47,14 @@ makeRLearner.classif.cvglmnet = function() { #' @export trainLearner.classif.cvglmnet = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE, recode.target = "drop.levels") info = getFixDataInfo(d$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) args = c(list(x = as.matrix(fixDataForLearner(d$data, info)), y = d$target), list(...)) rm(d) - if (!is.null(.weights)) + if (!is.null(.weights)) { args$weights = .weights + } td = getTaskDesc(.task) args$family = ifelse(length(td$class.levels) == 2L, "binomial", "multinomial") @@ -70,6 +73,7 @@ trainLearner.classif.cvglmnet = function(.learner, .task, .subset, .weights = NU #' @export predictLearner.classif.cvglmnet = function(.learner, .model, .newdata, ...) { + info = getTrainingInfo(.model) .newdata = as.matrix(fixDataForLearner(.newdata, info)) if (.learner$predict.type == "prob") { diff --git a/R/RLearner_classif_dbnDNN.R b/R/RLearner_classif_dbnDNN.R index 451b124987..db3e2b8c0f 100644 --- a/R/RLearner_classif_dbnDNN.R +++ b/R/RLearner_classif_dbnDNN.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.dbnDNN = function() { + makeRLearnerClassif( cl = "classif.dbnDNN", package = "deepnet", @@ -26,7 +27,8 @@ makeRLearner.classif.dbnDNN = function() { } #' @export -trainLearner.classif.dbnDNN = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.dbnDNN = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) y = as.numeric(d$target) dict = sort(unique(y)) @@ -40,6 +42,7 @@ trainLearner.classif.dbnDNN = function(.learner, .task, .subset, .weights = NULL #' @export predictLearner.classif.dbnDNN = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, response = "class", prob = "raw") pred = deepnet::nn.predict(.model$learner.model, data.matrix(.newdata)) colnames(pred) = .model$factor.levels[[1]] diff --git a/R/RLearner_classif_dcSVM.R b/R/RLearner_classif_dcSVM.R index 95a9aefc5c..7d0b06cda7 100644 --- a/R/RLearner_classif_dcSVM.R +++ b/R/RLearner_classif_dcSVM.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.dcSVM = function() { + makeRLearnerClassif( cl = "classif.dcSVM", package = c("SwarmSVM", "e1071"), @@ -29,6 +30,7 @@ makeRLearner.classif.dcSVM = function() { #' @export trainLearner.classif.dcSVM = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) pars = list(...) m.flag = FALSE @@ -79,8 +81,10 @@ trainLearner.classif.dcSVM = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.dcSVM = function(.learner, .model, .newdata, ...) { + prediction = predict(.model$learner.model, newdata = .newdata, ...) - if (!is.factor(prediction)) # depends on parameters AND data + if (!is.factor(prediction)) { # depends on parameters AND data prediction = factor(prediction, levels = c(1, 2), labels = .model$factor.levels[[1]]) + } prediction } diff --git a/R/RLearner_classif_earth.R b/R/RLearner_classif_earth.R index 6ace4454e7..2dea0fbc3c 100644 --- a/R/RLearner_classif_earth.R +++ b/R/RLearner_classif_earth.R @@ -1,5 +1,6 @@ - #' @export +#' @export makeRLearner.classif.earth = function() { + makeRLearnerClassif( cl = "classif.earth", package = c("!earth", "stats"), @@ -43,12 +44,14 @@ makeRLearner.classif.earth = function() { #' @export trainLearner.classif.earth = function(.learner, .task, .subset, .weights = NULL, link = "logit", maxit = 25L, ...) { + f = getTaskFormula(.task) earth::earth(f, data = getTaskData(.task, .subset), weights = .weights, glm = list(family = binomial(link = link), maxit = maxit), ...) } #' @export predictLearner.classif.earth = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, newdata = .newdata, type = "response", ...) levs = .model$task.desc$class.levels if (.learner$predict.type == "prob") { diff --git a/R/RLearner_classif_evtree.R b/R/RLearner_classif_evtree.R index ee330fb5aa..4c0f484179 100644 --- a/R/RLearner_classif_evtree.R +++ b/R/RLearner_classif_evtree.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.evtree = function() { + makeRLearnerClassif( cl = "classif.evtree", package = "evtree", @@ -30,6 +31,7 @@ makeRLearner.classif.evtree = function() { trainLearner.classif.evtree = function(.learner, .task, .subset, .weights = NULL, pmutatemajor, pmutateminor, pcrossover, psplit, pprune, seed, ...) { + f = getTaskFormula(.task) d = getTaskData(.task, .subset) defaults = getDefaults(getParamSet(.learner)) @@ -46,6 +48,7 @@ trainLearner.classif.evtree = function(.learner, .task, .subset, #' @export predictLearner.classif.evtree = function(.learner, .model, .newdata, ...) { + colnames(.newdata) = attr(.model$learner.model$terms, "term.labels") if (.learner$predict.type == "prob") { p = predict(.model$learner.model, newdata = .newdata, type = "prob", ...) diff --git a/R/RLearner_classif_extraTrees.R b/R/RLearner_classif_extraTrees.R index b39cda1f19..05c6589ba0 100644 --- a/R/RLearner_classif_extraTrees.R +++ b/R/RLearner_classif_extraTrees.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.extraTrees = function() { + makeRLearnerClassif( cl = "classif.extraTrees", package = "extraTrees", @@ -26,15 +27,18 @@ makeRLearner.classif.extraTrees = function() { #' @export trainLearner.classif.extraTrees = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) args = c(list(x = as.matrix(d$data), y = d$target), list(...)) - if (!is.null(.weights)) + if (!is.null(.weights)) { args$weights = .weights + } do.call(extraTrees::extraTrees, args) } #' @export predictLearner.classif.extraTrees = function(.learner, .model, .newdata, ...) { + is.prob = .learner$predict.type == "prob" predict(.model$learner.model, as.matrix(.newdata), probability = is.prob, ...) } diff --git a/R/RLearner_classif_fdausc.glm.R b/R/RLearner_classif_fdausc.glm.R index b1c9792605..b543d3dfe8 100644 --- a/R/RLearner_classif_fdausc.glm.R +++ b/R/RLearner_classif_fdausc.glm.R @@ -5,6 +5,7 @@ #' #' @export makeRLearner.classif.fdausc.glm = function() { + makeRLearnerClassif( cl = "classif.fdausc.glm", package = "fda.usc", @@ -40,6 +41,7 @@ trainLearner.classif.fdausc.glm = function(.learner, .task, .subset, .weights = #' @export predictLearner.classif.fdausc.glm = function(.learner, .model, .newdata, ...) { + # transform the data into fda.usc:fdata class type. fd = getFunctionalFeatures(.newdata) nd = list(x = fda.usc::fdata(mdata = fd)) diff --git a/R/RLearner_classif_fdausc.kernel.R b/R/RLearner_classif_fdausc.kernel.R index 6e63053fa0..b53ef1fa7f 100644 --- a/R/RLearner_classif_fdausc.kernel.R +++ b/R/RLearner_classif_fdausc.kernel.R @@ -5,6 +5,7 @@ #' #' @export makeRLearner.classif.fdausc.kernel = function() { + makeRLearnerClassif( cl = "classif.fdausc.kernel", package = "fda.usc", @@ -28,6 +29,7 @@ makeRLearner.classif.fdausc.kernel = function() { #' @export trainLearner.classif.fdausc.kernel = function(.learner, .task, .subset, .weights = NULL, trim, draw, ...) { + # Get and transform functional data d = getTaskData(.task, subset = .subset, target.extra = TRUE, functionals.as = "matrix") fd = getFunctionalFeatures(d$data) diff --git a/R/RLearner_classif_fdausc.knn.R b/R/RLearner_classif_fdausc.knn.R index 88f78f7d21..15572c135f 100644 --- a/R/RLearner_classif_fdausc.knn.R +++ b/R/RLearner_classif_fdausc.knn.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.fdausc.knn = function() { + makeRLearnerClassif( cl = "classif.fdausc.knn", package = "fda.usc", @@ -32,7 +33,7 @@ trainLearner.classif.fdausc.knn = function(.learner, .task, .subset, .weights = par.cv = learnerArgsToControl(list, trim, draw) fda.usc::classif.knn(group = d$target, fdataobj = data.fdclass, par.CV = par.cv, par.S = list(w = .weights), ...) - } +} #' @export predictLearner.classif.fdausc.knn = function(.learner, .model, .newdata, ...) { diff --git a/R/RLearner_classif_fdausc.np.R b/R/RLearner_classif_fdausc.np.R index 243798ede9..e73cd5a991 100644 --- a/R/RLearner_classif_fdausc.np.R +++ b/R/RLearner_classif_fdausc.np.R @@ -5,6 +5,7 @@ #' #' @export makeRLearner.classif.fdausc.np = function() { + makeRLearnerClassif( cl = "classif.fdausc.np", package = "fda.usc", diff --git a/R/RLearner_classif_featureless.R b/R/RLearner_classif_featureless.R index 435e936085..3402d63fcd 100644 --- a/R/RLearner_classif_featureless.R +++ b/R/RLearner_classif_featureless.R @@ -26,6 +26,7 @@ NULL #' @export makeRLearner.classif.featureless = function() { + makeRLearnerClassif( cl = "classif.featureless", package = "mlr", @@ -41,6 +42,7 @@ makeRLearner.classif.featureless = function() { #' @export trainLearner.classif.featureless = function(.learner, .task, .subset, .weights = NULL, method = "majority", ...) { + y = getTaskTargets(.task) if (!is.null(.subset)) { y = y[.subset] @@ -53,6 +55,7 @@ trainLearner.classif.featureless = function(.learner, .task, .subset, .weights = #' @export predictLearner.classif.featureless = function(.learner, .model, .newdata, ...) { + # extract some shortcuts n = nrow(.newdata) ptype = .learner$predict.type diff --git a/R/RLearner_classif_fnn.R b/R/RLearner_classif_fnn.R index aa7e5b5296..2271c784bb 100644 --- a/R/RLearner_classif_fnn.R +++ b/R/RLearner_classif_fnn.R @@ -1,6 +1,7 @@ -#FIXME: probs can only be predicted for two class problems (winning class) +# FIXME: probs can only be predicted for two class problems (winning class) #' @export makeRLearner.classif.fnn = function() { + makeRLearnerClassif( cl = "classif.fnn", package = "FNN", @@ -18,13 +19,15 @@ makeRLearner.classif.fnn = function() { } #' @export -trainLearner.classif.fnn = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.fnn = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) list(train = d, parset = list(...)) } #' @export predictLearner.classif.fnn = function(.learner, .model, .newdata, ...) { + m = .model$learner.model pars = list(train = m$train$data, test = .newdata, cl = m$train$target) pars = c(pars, m$parset, list(...)) diff --git a/R/RLearner_classif_gamboost.R b/R/RLearner_classif_gamboost.R index 70b6d3c0ae..59472ead8c 100644 --- a/R/RLearner_classif_gamboost.R +++ b/R/RLearner_classif_gamboost.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.gamboost = function() { + makeRLearnerClassif( cl = "classif.gamboost", package = "mboost", @@ -13,8 +14,8 @@ makeRLearner.classif.gamboost = function() { makeUntypedLearnerParam(id = "custom.family.definition", requires = quote(family == "custom.family")), makeDiscreteLearnerParam(id = "Binomial.link", default = "logit", values = c("logit", "probit"), requires = quote(family == "Binomial")), - #makeNumericVectorLearnerParam(id = "nuirange", default = c(-0.5, -1), requires = quote(family == "PropOdds")), - #makeNumericVectorLearnerParam(id = "offrange", default = c(-5,5), requires = quote(family == "PropOdds")), + # makeNumericVectorLearnerParam(id = "nuirange", default = c(-0.5, -1), requires = quote(family == "PropOdds")), + # makeNumericVectorLearnerParam(id = "offrange", default = c(-5,5), requires = quote(family == "PropOdds")), makeIntegerLearnerParam(id = "mstop", default = 100L, lower = 1L), makeNumericLearnerParam(id = "nu", default = 0.1, lower = 0, upper = 1), makeDiscreteLearnerParam(id = "risk", values = c("inbag", "oobag", "none")), @@ -32,14 +33,15 @@ makeRLearner.classif.gamboost = function() { } #' @export -trainLearner.classif.gamboost = function(.learner, .task, .subset, .weights = NULL, Binomial.link = "logit", mstop, nu, risk, stopintern, trace, family, custom.family.definition, ...) { +trainLearner.classif.gamboost = function(.learner, .task, .subset, .weights = NULL, Binomial.link = "logit", mstop, nu, risk, stopintern, trace, family, custom.family.definition, ...) { + requirePackages("mboost", why = "argument 'baselearner' requires package", suppress.warnings = TRUE) ctrl = learnerArgsToControl(mboost::boost_control, mstop, nu, risk, stopintern, trace) family = switch(family, Binomial = mboost::Binomial(link = Binomial.link), AdaExp = mboost::AdaExp(), AUC = mboost::AUC(), - #PropOdds = mboost::PropOdds(nuirange = nuirange, offrange = offrange), + # PropOdds = mboost::PropOdds(nuirange = nuirange, offrange = offrange), custom.family = custom.family.definition) d = getTaskData(.task, .subset) if (.learner$predict.type == "prob") { @@ -50,7 +52,7 @@ trainLearner.classif.gamboost = function(.learner, .task, .subset, .weights = NU f = getTaskFormula(.task) if (is.null(.weights)) { model = mboost::gamboost(f, data = d, control = ctrl, family = family, ...) - } else { + } else { model = mboost::gamboost(f, data = d, control = ctrl, weights = .weights, family = family, ...) } model @@ -58,10 +60,11 @@ trainLearner.classif.gamboost = function(.learner, .task, .subset, .weights = NU #' @export predictLearner.classif.gamboost = function(.learner, .model, .newdata, ...) { + type = ifelse(.learner$predict.type == "response", "class", "response") p = predict(.model$learner.model, newdata = .newdata, type = type, ...) - if (.learner$predict.type == "prob") { - if (!is.matrix(p) && is.na(p)){ + if (.learner$predict.type == "prob") { + if (!is.matrix(p) && is.na(p)) { stopf("The selected family %s does not support probabilities", getHyperPars(.learner)$family) } else { td = .model$task.desc diff --git a/R/RLearner_classif_gaterSVM.R b/R/RLearner_classif_gaterSVM.R index 79427342a0..d18ba1de3a 100644 --- a/R/RLearner_classif_gaterSVM.R +++ b/R/RLearner_classif_gaterSVM.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.gaterSVM = function() { + makeRLearnerClassif( cl = "classif.gaterSVM", package = "SwarmSVM", @@ -29,12 +30,14 @@ makeRLearner.classif.gaterSVM = function() { #' @export trainLearner.classif.gaterSVM = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) SwarmSVM::gaterSVM(x = d$data, y = d$target, ...) } #' @export predictLearner.classif.gaterSVM = function(.learner, .model, .newdata, ...) { + factor(predict(.model$learner.model, newdata = .newdata, ...), levels = c(-1, 1), labels = .model$factor.levels[[1]]) diff --git a/R/RLearner_classif_gausspr.R b/R/RLearner_classif_gausspr.R index 7ce41a55c3..74db02834f 100644 --- a/R/RLearner_classif_gausspr.R +++ b/R/RLearner_classif_gausspr.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.gausspr = function() { + makeRLearnerClassif( cl = "classif.gausspr", package = "kernlab", @@ -34,18 +35,21 @@ makeRLearner.classif.gausspr = function() { #' @export trainLearner.classif.gausspr = function(.learner, .task, .subset, .weights = NULL, - degree, offset, scale, sigma, order, length, lambda, normalized, ...) { + degree, offset, scale, sigma, order, length, lambda, normalized, ...) { + kpar = learnerArgsToControl(list, degree, offset, scale, sigma, order, length, lambda, normalized) f = getTaskFormula(.task) pm = .learner$predict.type == "prob" - if (base::length(kpar) > 0L) + if (base::length(kpar) > 0L) { kernlab::gausspr(f, data = getTaskData(.task, .subset), kpar = kpar, prob.model = pm, ...) - else + } else { kernlab::gausspr(f, data = getTaskData(.task, .subset), prob.model = pm, ...) + } } #' @export predictLearner.classif.gausspr = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, prob = "probabilities", "response") kernlab::predict(.model$learner.model, newdata = .newdata, type = type, ...) } diff --git a/R/RLearner_classif_gbm.R b/R/RLearner_classif_gbm.R index 1ddb7b50b3..7d811c81ff 100644 --- a/R/RLearner_classif_gbm.R +++ b/R/RLearner_classif_gbm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.gbm = function() { + makeRLearnerClassif( cl = "classif.gbm", package = "gbm", @@ -26,16 +27,18 @@ makeRLearner.classif.gbm = function() { } #' @export -trainLearner.classif.gbm = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.gbm = function(.learner, .task, .subset, .weights = NULL, ...) { + td = getTaskDesc(.task) - if (length(td$class.levels) == 2L) + if (length(td$class.levels) == 2L) { d = getTaskData(.task, .subset, recode.target = "01") - else + } else { d = getTaskData(.task, .subset) + } if (is.null(.weights)) { f = getTaskFormula(.task) gbm::gbm(f, data = d, ...) - } else { + } else { f = getTaskFormula(.task) gbm::gbm(f, data = d, weights = .weights, ...) } @@ -43,6 +46,7 @@ trainLearner.classif.gbm = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.gbm = function(.learner, .model, .newdata, ...) { + td = .model$task.desc m = .model$learner.model p = gbm::predict.gbm(m, newdata = .newdata, type = "response", n.trees = m$n.trees, single.tree = FALSE, ...) @@ -73,6 +77,7 @@ predictLearner.classif.gbm = function(.learner, .model, .newdata, ...) { #' @export getFeatureImportanceLearner.classif.gbm = function(.learner, .model, ...) { + mod = getLearnerModel(.model, more.unwrap = TRUE) gbm::relative.influence(mod, mod$n.trees, ...) } diff --git a/R/RLearner_classif_geoDA.R b/R/RLearner_classif_geoDA.R index 153de9629d..23b83f2beb 100644 --- a/R/RLearner_classif_geoDA.R +++ b/R/RLearner_classif_geoDA.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.geoDA = function() { + makeRLearnerClassif( cl = "classif.geoDA", package = "DiscriMiner", @@ -16,15 +17,17 @@ makeRLearner.classif.geoDA = function() { } #' @export -trainLearner.classif.geoDA = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.geoDA = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE, recode.target = "drop.levels") DiscriMiner::geoDA(variables = d$data, group = d$target, ...) } #' @export predictLearner.classif.geoDA = function(.learner, .model, .newdata, ...) { + m = .model$learner.model p = DiscriMiner::classify(m, newdata = .newdata) - #p$scores #we loose this information + # p$scores #we loose this information p$pred_class } diff --git a/R/RLearner_classif_glmboost.R b/R/RLearner_classif_glmboost.R index a4f5961538..b6cdd718e3 100644 --- a/R/RLearner_classif_glmboost.R +++ b/R/RLearner_classif_glmboost.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.glmboost = function() { + makeRLearnerClassif( cl = "classif.glmboost", package = "mboost", @@ -8,8 +9,8 @@ makeRLearner.classif.glmboost = function() { makeDiscreteLearnerParam(id = "family", default = "Binomial", values = c("Binomial", "AdaExp", "AUC", "custom.family")), makeUntypedLearnerParam(id = "custom.family.definition", requires = quote(family == "custom.family")), - #makeNumericVectorLearnerParam(id = "nuirange", default = c(-0.5,-1), requires = quote(family == "PropOdds")), - #makeNumericVectorLearnerParam(id = "offrange", default = c(-5,5), requires = quote(family == "PropOdds")), + # makeNumericVectorLearnerParam(id = "nuirange", default = c(-0.5,-1), requires = quote(family == "PropOdds")), + # makeNumericVectorLearnerParam(id = "offrange", default = c(-5,5), requires = quote(family == "PropOdds")), makeDiscreteLearnerParam(id = "Binomial.link", default = "logit", values = c("logit", "probit")), makeIntegerLearnerParam(id = "mstop", default = 100L, lower = 1L), makeNumericLearnerParam(id = "nu", default = 0.1, lower = 0, upper = 1), @@ -29,13 +30,14 @@ makeRLearner.classif.glmboost = function() { } #' @export -trainLearner.classif.glmboost = function(.learner, .task, .subset, .weights = NULL, Binomial.link = "logit", custom.family.definition, mstop, nu, risk, stopintern, trace, family, ...) { +trainLearner.classif.glmboost = function(.learner, .task, .subset, .weights = NULL, Binomial.link = "logit", custom.family.definition, mstop, nu, risk, stopintern, trace, family, ...) { + ctrl = learnerArgsToControl(mboost::boost_control, mstop, nu, risk, stopintern, trace) family = switch(family, Binomial = mboost::Binomial(link = Binomial.link), AdaExp = mboost::AdaExp(), AUC = mboost::AUC(), - #PropOdds = mboost::PropOdds(nuirange = nuirange, offrange = offrange), + # PropOdds = mboost::PropOdds(nuirange = nuirange, offrange = offrange), custom.family = custom.family.definition) d = getTaskData(.task, .subset) if (.learner$predict.type == "prob") { @@ -46,7 +48,7 @@ trainLearner.classif.glmboost = function(.learner, .task, .subset, .weights = NU f = getTaskFormula(.task) if (is.null(.weights)) { model = mboost::glmboost(f, data = d, control = ctrl, family = family, ...) - } else { + } else { model = mboost::glmboost(f, data = d, control = ctrl, weights = .weights, family = family, ...) } model @@ -54,10 +56,11 @@ trainLearner.classif.glmboost = function(.learner, .task, .subset, .weights = NU #' @export predictLearner.classif.glmboost = function(.learner, .model, .newdata, ...) { + type = ifelse(.learner$predict.type == "response", "class", "response") p = predict(.model$learner.model, newdata = .newdata, type = type, ...) - if (.learner$predict.type == "prob") { - if (!is.matrix(p) && is.na(p)){ + if (.learner$predict.type == "prob") { + if (!is.matrix(p) && is.na(p)) { stopf("The selected family %s does not support probabilities", getHyperPars(.learner)$family) } else { td = .model$task.desc diff --git a/R/RLearner_classif_glmnet.R b/R/RLearner_classif_glmnet.R index 4bfd41f015..518b7564bc 100644 --- a/R/RLearner_classif_glmnet.R +++ b/R/RLearner_classif_glmnet.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.glmnet = function() { + makeRLearnerClassif( cl = "classif.glmnet", package = "glmnet", @@ -51,12 +52,14 @@ makeRLearner.classif.glmnet = function() { #' @export trainLearner.classif.glmnet = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE, recode.target = "drop.levels") info = getFixDataInfo(d$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) args = c(list(x = as.matrix(fixDataForLearner(d$data, info)), y = d$target), list(...)) rm(d) - if (!is.null(.weights)) + if (!is.null(.weights)) { args$weights = .weights + } td = getTaskDesc(.task) args$family = ifelse(length(td$class.levels) == 2L, "binomial", "multinomial") @@ -75,6 +78,7 @@ trainLearner.classif.glmnet = function(.learner, .task, .subset, .weights = NULL #' @export predictLearner.classif.glmnet = function(.learner, .model, .newdata, ...) { + info = getTrainingInfo(.model) .newdata = as.matrix(fixDataForLearner(.newdata, info)) if (.learner$predict.type == "prob") { diff --git a/R/RLearner_classif_h2odeeplearning.R b/R/RLearner_classif_h2odeeplearning.R index 11fe1bebc3..4620fab790 100644 --- a/R/RLearner_classif_h2odeeplearning.R +++ b/R/RLearner_classif_h2odeeplearning.R @@ -151,8 +151,9 @@ # Details: https://leanpub.com/deeplearning/read -#'@export +#' @export makeRLearner.classif.h2o.deeplearning = function() { + makeRLearnerClassif( cl = "classif.h2o.deeplearning", package = "h2o", @@ -161,7 +162,7 @@ makeRLearner.classif.h2o.deeplearning = function() { makeLogicalLearnerParam("use_all_factor_level", default = TRUE), makeDiscreteLearnerParam("activation", values = c("Rectifier", "Tanh", "TanhWithDropout", "RectifierWithDropout", "Maxout", "MaxoutWithDropout"), - default = "Rectifier"), + default = "Rectifier"), # FIXME: hidden can also be a list of integer vectors for grid search makeIntegerVectorLearnerParam("hidden", default = c(200L, 200L), len = NA_integer_, lower = 1L), @@ -183,7 +184,7 @@ makeRLearner.classif.h2o.deeplearning = function() { makeNumericLearnerParam("l1", default = 0), makeNumericLearnerParam("l2", default = 0), makeNumericLearnerParam("max_w2", default = Inf, allow.inf = TRUE), - #makeNumericLearnerParam("max_w2", default = 1e+06), + # makeNumericLearnerParam("max_w2", default = 1e+06), makeDiscreteLearnerParam("initial_weight_distribution", values = c("UniformAdaptive", "Uniform", "Normal"), default = "UniformAdaptive"), makeNumericLearnerParam("initial_weight_scale", default = 1), @@ -215,7 +216,7 @@ makeRLearner.classif.h2o.deeplearning = function() { makeLogicalLearnerParam("sparse", default = FALSE, tunable = FALSE), makeLogicalLearnerParam("col_major", default = FALSE, tunable = FALSE), makeLogicalLearnerParam("average_activation", tunable = FALSE), - #makeLogicalLearnerParam("sparsity_beta", tunable = FALSE), + # makeLogicalLearnerParam("sparsity_beta", tunable = FALSE), makeLogicalLearnerParam("reproducible", default = FALSE, tunable = FALSE), makeLogicalLearnerParam("export_weights_and_biases", default = FALSE, tunable = FALSE) ), @@ -228,7 +229,8 @@ makeRLearner.classif.h2o.deeplearning = function() { } #' @export -trainLearner.classif.h2o.deeplearning = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.h2o.deeplearning = function(.learner, .task, .subset, .weights = NULL, ...) { + # check if h2o connection already exists, otherwise start one conn.up = tryCatch(h2o::h2o.getConnection(), error = function(err) return(FALSE)) if (!inherits(conn.up, "H2OConnection")) { @@ -248,6 +250,7 @@ trainLearner.classif.h2o.deeplearning = function(.learner, .task, .subset, .weig #' @export predictLearner.classif.h2o.deeplearning = function(.learner, .model, .newdata, ...) { + m = .model$learner.model h2of = h2o::as.h2o(.newdata) p = h2o::h2o.predict(m, newdata = h2of, ...) @@ -256,8 +259,9 @@ predictLearner.classif.h2o.deeplearning = function(.learner, .model, .newdata, . # check if class names are integers. if yes, colnames of p.df need to be adapted int = stri_detect_regex(p.df$predict, "^[[:digit:]]+$") pcol = stri_detect_regex(colnames(p.df), "^p[[:digit:]]+$") - if (any(int) && any(pcol)) + if (any(int) && any(pcol)) { colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L) + } if (.learner$predict.type == "response") { return(p.df$predict) diff --git a/R/RLearner_classif_h2ogbm.R b/R/RLearner_classif_h2ogbm.R index 61e54c513c..2fb5511838 100644 --- a/R/RLearner_classif_h2ogbm.R +++ b/R/RLearner_classif_h2ogbm.R @@ -1,12 +1,13 @@ #' @export makeRLearner.classif.h2o.gbm = function() { + makeRLearnerClassif( cl = "classif.h2o.gbm", package = "h2o", par.set = makeParamSet( makeIntegerLearnerParam("ntrees", lower = 1L, default = 50L), makeIntegerLearnerParam("max_depth", lower = 1L, default = 5L), - makeIntegerLearnerParam("min_rows", lower = 1L, default = 10L), + makeIntegerLearnerParam("min_rows", lower = 1L, default = 10L), makeNumericLearnerParam("learn_rate", lower = 0, upper = 1, default = 0.1), makeIntegerLearnerParam("nbins", lower = 1L, default = 20L), makeIntegerLearnerParam("nbins_cats", lower = 1L, default = 1024L), @@ -23,7 +24,8 @@ makeRLearner.classif.h2o.gbm = function() { } #' @export -trainLearner.classif.h2o.gbm = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.h2o.gbm = function(.learner, .task, .subset, .weights = NULL, ...) { + # check if h2o connection already exists, otherwise start one conn.up = tryCatch(h2o::h2o.getConnection(), error = function(err) return(FALSE)) if (!inherits(conn.up, "H2OConnection")) { @@ -39,6 +41,7 @@ trainLearner.classif.h2o.gbm = function(.learner, .task, .subset, .weights = NUL #' @export predictLearner.classif.h2o.gbm = function(.learner, .model, .newdata, ...) { + m = .model$learner.model h2of = h2o::as.h2o(.newdata) p = h2o::h2o.predict(m, newdata = h2of, ...) @@ -47,8 +50,9 @@ predictLearner.classif.h2o.gbm = function(.learner, .model, .newdata, ...) { # check if class names are integers. if yes, colnames of p.df need to be adapted int = stri_detect_regex(p.df$predict, "^[[:digit:]]+$") pcol = stri_detect_regex(colnames(p.df), "^p[[:digit:]]+$") - if (any(int) && any(pcol)) + if (any(int) && any(pcol)) { colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L) + } if (.learner$predict.type == "response") { return(p.df$predict) diff --git a/R/RLearner_classif_h2oglm.R b/R/RLearner_classif_h2oglm.R index a37c269aa0..1b60ea45df 100644 --- a/R/RLearner_classif_h2oglm.R +++ b/R/RLearner_classif_h2oglm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.h2o.glm = function() { + makeRLearnerClassif( cl = "classif.h2o.glm", package = "h2o", @@ -30,7 +31,8 @@ makeRLearner.classif.h2o.glm = function() { } #' @export -trainLearner.classif.h2o.glm = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.h2o.glm = function(.learner, .task, .subset, .weights = NULL, ...) { + # check if h2o connection already exists, otherwise start one conn.up = tryCatch(h2o::h2o.getConnection(), error = function(err) return(FALSE)) if (!inherits(conn.up, "H2OConnection")) { @@ -50,6 +52,7 @@ trainLearner.classif.h2o.glm = function(.learner, .task, .subset, .weights = NUL #' @export predictLearner.classif.h2o.glm = function(.learner, .model, .newdata, ...) { + m = .model$learner.model h2of = h2o::as.h2o(.newdata) p = h2o::h2o.predict(m, newdata = h2of, ...) @@ -58,8 +61,9 @@ predictLearner.classif.h2o.glm = function(.learner, .model, .newdata, ...) { # check if class names are integers. if yes, colnames of p.df need to be adapted int = stri_detect_regex(p.df$predict, "^[[:digit:]]+$") pcol = stri_detect_regex(colnames(p.df), "^p[[:digit:]]+$") - if (any(int) && any(pcol)) + if (any(int) && any(pcol)) { colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L) + } if (.learner$predict.type == "response") { return(p.df$predict) diff --git a/R/RLearner_classif_h2orandomForest.R b/R/RLearner_classif_h2orandomForest.R index 1e65b9b55f..e476b57767 100644 --- a/R/RLearner_classif_h2orandomForest.R +++ b/R/RLearner_classif_h2orandomForest.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.h2o.randomForest = function() { + makeRLearnerClassif( cl = "classif.h2o.randomForest", package = "h2o", @@ -9,7 +10,7 @@ makeRLearner.classif.h2o.randomForest = function() { makeLogicalLearnerParam("build_tree_one_node", default = FALSE, tunable = FALSE), makeIntegerLearnerParam("ntrees", lower = 1L, default = 50L), makeIntegerLearnerParam("max_depth", lower = 1L, default = 20L), - makeIntegerLearnerParam("min_rows", lower = 1L, default = 1L), + makeIntegerLearnerParam("min_rows", lower = 1L, default = 1L), makeIntegerLearnerParam("nbins", lower = 1L, default = 20L), makeIntegerLearnerParam("nbins_cats", lower = 1L, default = 1024L), makeLogicalLearnerParam("binomial_double_trees", default = TRUE), @@ -25,7 +26,8 @@ makeRLearner.classif.h2o.randomForest = function() { } #' @export -trainLearner.classif.h2o.randomForest = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.h2o.randomForest = function(.learner, .task, .subset, .weights = NULL, ...) { + # check if h2o connection already exists, otherwise start one conn.up = tryCatch(h2o::h2o.getConnection(), error = function(err) return(FALSE)) if (!inherits(conn.up, "H2OConnection")) { @@ -40,6 +42,7 @@ trainLearner.classif.h2o.randomForest = function(.learner, .task, .subset, .weig #' @export predictLearner.classif.h2o.randomForest = function(.learner, .model, .newdata, ...) { + m = .model$learner.model h2of = h2o::as.h2o(.newdata) p = h2o::h2o.predict(m, newdata = h2of, ...) @@ -48,8 +51,9 @@ predictLearner.classif.h2o.randomForest = function(.learner, .model, .newdata, . # check if class names are integers. if yes, colnames of p.df need to be adapted int = stri_detect_regex(p.df$predict, "^[[:digit:]]+$") pcol = stri_detect_regex(colnames(p.df), "^p[[:digit:]]+$") - if (any(int) && any(pcol)) + if (any(int) && any(pcol)) { colnames(p.df)[pcol] = stri_sub(colnames(p.df)[pcol], 2L) + } if (.learner$predict.type == "response") { return(p.df$predict) diff --git a/R/RLearner_classif_kknn.R b/R/RLearner_classif_kknn.R index d0b6a9f45e..613df3fff7 100644 --- a/R/RLearner_classif_kknn.R +++ b/R/RLearner_classif_kknn.R @@ -1,11 +1,12 @@ #' @export makeRLearner.classif.kknn = function() { + makeRLearnerClassif( cl = "classif.kknn", # FIXME: kknn set its own contr.dummy function, if we requireNamespace, # this is not found, see issue 226 package = "!kknn", - #FIXME: find out what ykernel and contrasts really do + # FIXME: find out what ykernel and contrasts really do par.set = makeParamSet( makeIntegerLearnerParam(id = "k", default = 7L, lower = 1L), makeNumericLearnerParam(id = "distance", default = 2, lower = 0), @@ -22,20 +23,22 @@ makeRLearner.classif.kknn = function() { } #' @export -trainLearner.classif.kknn = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.kknn = function(.learner, .task, .subset, .weights = NULL, ...) { + list(td = getTaskDesc(.task), data = getTaskData(.task, .subset), parset = list(...)) } #' @export predictLearner.classif.kknn = function(.learner, .model, .newdata, ...) { + m = .model$learner.model f = getTaskFormula(.model$task.desc) pars = list(formula = f, train = m$data, test = .newdata) pars = c(pars, m$parset, list(...)) m = do.call(kknn::kknn, pars) - if (.learner$predict.type == "response") + if (.learner$predict.type == "response") { return(m$fitted.values) - else + } else { return(m$prob) + } } - diff --git a/R/RLearner_classif_knn.R b/R/RLearner_classif_knn.R index 234a80db1d..6eab9dd810 100644 --- a/R/RLearner_classif_knn.R +++ b/R/RLearner_classif_knn.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.knn = function() { + makeRLearnerClassif( cl = "classif.knn", package = "class", @@ -19,15 +20,16 @@ makeRLearner.classif.knn = function() { } #' @export -trainLearner.classif.knn = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.knn = function(.learner, .task, .subset, .weights = NULL, ...) { + z = getTaskData(.task, .subset, target.extra = TRUE) c(list(train = z$data, cl = z$target), list(...)) } #' @export predictLearner.classif.knn = function(.learner, .model, .newdata, ...) { + args = .model$learner.model args$test = .newdata do.call(class::knn, args) } - diff --git a/R/RLearner_classif_ksvm.R b/R/RLearner_classif_ksvm.R index daf176db15..7dc4b04031 100644 --- a/R/RLearner_classif_ksvm.R +++ b/R/RLearner_classif_ksvm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.ksvm = function() { + makeRLearnerClassif( cl = "classif.ksvm", package = "kernlab", @@ -42,26 +43,28 @@ makeRLearner.classif.ksvm = function() { } #' @export -trainLearner.classif.ksvm = function(.learner, .task, .subset, .weights = NULL, degree, offset, scale, sigma, order, length, lambda, normalized, ...) { +trainLearner.classif.ksvm = function(.learner, .task, .subset, .weights = NULL, degree, offset, scale, sigma, order, length, lambda, normalized, ...) { # FIXME: custom kernel. freezes? check mailing list # FIXME: unify cla + regr, test all sigma stuff -# # there's a strange behaviour in r semantics here wgich forces this, see do.call and the comment about substitute -# if (!is.null(args$kernel) && is.function(args$kernel) && !is(args$kernel,"kernel")) { -# args$kernel = do.call(args$kernel, kpar) -# } + # # there's a strange behaviour in r semantics here wgich forces this, see do.call and the comment about substitute + # if (!is.null(args$kernel) && is.function(args$kernel) && !is(args$kernel,"kernel")) { + # args$kernel = do.call(args$kernel, kpar) + # } kpar = learnerArgsToControl(list, degree, offset, scale, sigma, order, length, lambda, normalized) f = getTaskFormula(.task) pm = .learner$predict.type == "prob" - if (base::length(kpar) > 0L) + if (base::length(kpar) > 0L) { kernlab::ksvm(f, data = getTaskData(.task, .subset), kpar = kpar, prob.model = pm, ...) - else + } else { kernlab::ksvm(f, data = getTaskData(.task, .subset), prob.model = pm, ...) + } } #' @export predictLearner.classif.ksvm = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, prob = "probabilities", "response") kernlab::predict(.model$learner.model, newdata = .newdata, type = type, ...) } diff --git a/R/RLearner_classif_lda.R b/R/RLearner_classif_lda.R index 6d9b7b75db..e12aa871f9 100644 --- a/R/RLearner_classif_lda.R +++ b/R/RLearner_classif_lda.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.lda = function() { + makeRLearnerClassif( cl = "classif.lda", package = "MASS", @@ -21,17 +22,19 @@ makeRLearner.classif.lda = function() { } #' @export -trainLearner.classif.lda = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.lda = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) MASS::lda(f, data = getTaskData(.task, .subset), ...) } #' @export predictLearner.classif.lda = function(.learner, .model, .newdata, predict.method = "plug-in", ...) { + p = predict(.model$learner.model, newdata = .newdata, method = predict.method, ...) - if (.learner$predict.type == "response") + if (.learner$predict.type == "response") { return(p$class) - else + } else { return(p$posterior) + } } - diff --git a/R/RLearner_classif_linDA.R b/R/RLearner_classif_linDA.R index d7503a89ba..58e691a813 100644 --- a/R/RLearner_classif_linDA.R +++ b/R/RLearner_classif_linDA.R @@ -1,10 +1,11 @@ #' @export makeRLearner.classif.linDA = function() { + makeRLearnerClassif( cl = "classif.linDA", package = "DiscriMiner", par.set = makeParamSet( - #makeNumericVectorLearnerParam(id = "prior", lower = 0, upper = 1, default = NULL), + # makeNumericVectorLearnerParam(id = "prior", lower = 0, upper = 1, default = NULL), makeDiscreteLearnerParam(id = "validation", values = list(crossval = "crossval", learntest = "learntest", NULL = NULL), default = NULL, tunable = FALSE) ), properties = c("twoclass", "multiclass", "numerics"), @@ -16,15 +17,17 @@ makeRLearner.classif.linDA = function() { } #' @export -trainLearner.classif.linDA = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.linDA = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE, recode.target = "drop.levels") DiscriMiner::linDA(variables = d$data, group = d$target, ...) } #' @export predictLearner.classif.linDA = function(.learner, .model, .newdata, ...) { + m = .model$learner.model p = DiscriMiner::classify(m, newdata = .newdata) - #p$scores #we loose this information + # p$scores #we loose this information p$pred_class } diff --git a/R/RLearner_classif_liquidSVM.R b/R/RLearner_classif_liquidSVM.R index 186ad8530e..c9099faf52 100644 --- a/R/RLearner_classif_liquidSVM.R +++ b/R/RLearner_classif_liquidSVM.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.liquidSVM = function() { + makeRLearnerClassif( cl = "classif.liquidSVM", package = "liquidSVM", @@ -7,7 +8,7 @@ makeRLearner.classif.liquidSVM = function() { makeIntegerLearnerParam(id = "d", lower = 0L, upper = 7L, tunable = FALSE), makeLogicalLearnerParam(id = "scale", default = TRUE), makeIntegerLearnerParam(id = "threads", lower = -1L, default = 0), - makeDiscreteLearnerParam(id = "kernel", default = "gauss_rbf", values = c("gauss_rbf","poisson")), + makeDiscreteLearnerParam(id = "kernel", default = "gauss_rbf", values = c("gauss_rbf", "poisson")), makeIntegerLearnerParam(id = "partition_choice", lower = 0L, upper = 6L, default = 0), makeIntegerLearnerParam(id = "grid_choice", lower = -2L, upper = 2L), makeIntegerLearnerParam(id = "adaptivity_control", lower = 0L, upper = 2L, default = 0), @@ -33,14 +34,16 @@ makeRLearner.classif.liquidSVM = function() { } #' @export -trainLearner.classif.liquidSVM = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.liquidSVM = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) - liquidSVM::svm(f, getTaskData(.task, .subset), ...) - #liquidSVM::svm(f, getTaskData(.task, .subset), predict.prob = .learner$predict.type == "prob", ...) + liquidSVM::svm(f, getTaskData(.task, .subset), ...) + # liquidSVM::svm(f, getTaskData(.task, .subset), predict.prob = .learner$predict.type == "prob", ...) } #' @export predictLearner.classif.liquidSVM = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, ...) # res = as.matrix(predict(.model$learner.model, newdata = .newdata, ...)) # res = res/rowSums(res) diff --git a/R/RLearner_classif_logreg.R b/R/RLearner_classif_logreg.R index 04272bc0e4..c22d4552da 100644 --- a/R/RLearner_classif_logreg.R +++ b/R/RLearner_classif_logreg.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.logreg = function() { + makeRLearnerClassif( cl = "classif.logreg", package = "stats", @@ -18,13 +19,15 @@ makeRLearner.classif.logreg = function() { } #' @export -trainLearner.classif.logreg = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.logreg = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) stats::glm(f, data = getTaskData(.task, .subset), family = "binomial", weights = .weights, ...) } #' @export predictLearner.classif.logreg = function(.learner, .model, .newdata, ...) { + x = predict(.model$learner.model, newdata = .newdata, type = "response", ...) levs = .model$task.desc$class.levels if (.learner$predict.type == "prob") { diff --git a/R/RLearner_classif_lssvm.R b/R/RLearner_classif_lssvm.R index 401beb162f..658c33033b 100644 --- a/R/RLearner_classif_lssvm.R +++ b/R/RLearner_classif_lssvm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.lssvm = function() { + makeRLearnerClassif( cl = "classif.lssvm", package = "kernlab", @@ -34,20 +35,23 @@ makeRLearner.classif.lssvm = function() { #' @export trainLearner.classif.lssvm = function(.learner, .task, .subset, .weights = NULL, degree, offset, scale, sigma, order, length, lambda, normalized, ...) { -# FIXME: custom kernel. freezes? check mailing list -# FIXME: unify cla + regr, test all sigma stuff + + # FIXME: custom kernel. freezes? check mailing list + # FIXME: unify cla + regr, test all sigma stuff kpar = learnerArgsToControl(list, degree, offset, scale, sigma, order, length, lambda, normalized) f = getTaskFormula(.task) - if (base::length(kpar)) + if (base::length(kpar)) { kernlab::lssvm(f, data = getTaskData(.task, .subset), kpar = kpar, ...) - else + } else { kernlab::lssvm(f, data = getTaskData(.task, .subset), ...) + } } #' @export predictLearner.classif.lssvm = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, "response") kernlab::predict(.model$learner.model, newdata = .newdata, type = type, ...) } diff --git a/R/RLearner_classif_lvq1.R b/R/RLearner_classif_lvq1.R index 835748c480..e14ddec69e 100644 --- a/R/RLearner_classif_lvq1.R +++ b/R/RLearner_classif_lvq1.R @@ -1,6 +1,7 @@ # FIXME: parset #' @export makeRLearner.classif.lvq1 = function() { + makeRLearnerClassif( cl = "classif.lvq1", package = "class", @@ -13,7 +14,8 @@ makeRLearner.classif.lvq1 = function() { } #' @export -trainLearner.classif.lvq1 = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.lvq1 = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) cdbk.args = insert(list(), list(...), c("size", "k", "prior")) cdbk.args$x = d$data @@ -29,5 +31,6 @@ trainLearner.classif.lvq1 = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.lvq1 = function(.learner, .model, .newdata, ...) { + class::lvqtest(.model$learner.model, test = .newdata, ...) } diff --git a/R/RLearner_classif_mda.R b/R/RLearner_classif_mda.R index f3fa9990a9..f24125cacf 100644 --- a/R/RLearner_classif_mda.R +++ b/R/RLearner_classif_mda.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.mda = function() { + makeRLearnerClassif( cl = "classif.mda", package = "!mda", @@ -29,13 +30,15 @@ makeRLearner.classif.mda = function() { } #' @export -trainLearner.classif.mda = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.mda = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) mda::mda(f, data = getTaskData(.task, .subset), ...) } #' @export predictLearner.classif.mda = function(.learner, .model, .newdata, ...) { + type = ifelse(.learner$predict.type == "response", "class", "posterior") predict(.model$learner.model, newdata = .newdata, type = type, ...) } diff --git a/R/RLearner_classif_mlp.R b/R/RLearner_classif_mlp.R index 78943bce15..ebb578be92 100644 --- a/R/RLearner_classif_mlp.R +++ b/R/RLearner_classif_mlp.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.mlp = function() { + makeRLearnerClassif( cl = "classif.mlp", package = "RSNNS", @@ -28,7 +29,8 @@ makeRLearner.classif.mlp = function() { } #' @export -trainLearner.classif.mlp = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.mlp = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) onehot = RSNNS::decodeClassLabels(d$target) RSNNS::mlp(x = d$data, y = onehot, ...) @@ -36,6 +38,7 @@ trainLearner.classif.mlp = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.mlp = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, response = "class", prob = "raw") pred = predict(.model$learner.model, .newdata) colnames(pred) = .model$factor.levels[[1]] diff --git a/R/RLearner_classif_multinom.R b/R/RLearner_classif_multinom.R index de65b936b3..e9d689be62 100644 --- a/R/RLearner_classif_multinom.R +++ b/R/RLearner_classif_multinom.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.multinom = function() { + makeRLearnerClassif( cl = "classif.multinom", package = "nnet", @@ -23,11 +24,12 @@ makeRLearner.classif.multinom = function() { } #' @export -trainLearner.classif.multinom = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.multinom = function(.learner, .task, .subset, .weights = NULL, ...) { + if (is.null(.weights)) { f = getTaskFormula(.task) nnet::multinom(f, data = getTaskData(.task, .subset), ...) - } else { + } else { f = getTaskFormula(.task) nnet::multinom(f, data = getTaskData(.task, .subset), weights = .weights, ...) } @@ -35,6 +37,7 @@ trainLearner.classif.multinom = function(.learner, .task, .subset, .weights = NU #' @export predictLearner.classif.multinom = function(.learner, .model, .newdata, ...) { + type = ifelse(.learner$predict.type == "response", "class", "probs") levs = .model$task.desc$class.levels p = predict(.model$learner.model, newdata = .newdata, type = type, ...) diff --git a/R/RLearner_classif_naiveBayes.R b/R/RLearner_classif_naiveBayes.R index da56c281a4..4ce2a34193 100644 --- a/R/RLearner_classif_naiveBayes.R +++ b/R/RLearner_classif_naiveBayes.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.naiveBayes = function() { + makeRLearnerClassif( cl = "classif.naiveBayes", package = "e1071", @@ -15,13 +16,15 @@ makeRLearner.classif.naiveBayes = function() { } #' @export -trainLearner.classif.naiveBayes = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.naiveBayes = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) e1071::naiveBayes(f, data = getTaskData(.task, .subset), ...) } #' @export predictLearner.classif.naiveBayes = function(.learner, .model, .newdata, ...) { + type = ifelse(.learner$predict.type == "response", "class", "raw") predict(.model$learner.model, newdata = .newdata, type = type, ...) } diff --git a/R/RLearner_classif_neuralnet.R b/R/RLearner_classif_neuralnet.R index ff154a4b6d..214c7635e9 100644 --- a/R/RLearner_classif_neuralnet.R +++ b/R/RLearner_classif_neuralnet.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.neuralnet = function() { + makeRLearnerClassif( cl = "classif.neuralnet", package = "neuralnet", @@ -11,18 +12,18 @@ makeRLearner.classif.neuralnet = function() { makeNumericVectorLearnerParam(id = "startweights"), makeNumericVectorLearnerParam(id = "learningrate.limit"), makeUntypedLearnerParam(id = "learningrate.factor", - default = list(minus = 0.5, plus = 1.2)), + default = list(minus = 0.5, plus = 1.2)), makeNumericLearnerParam(id = "learningrate"), makeDiscreteLearnerParam(id = "lifesign", default = "none", - values = c("none", "minimal", "full")), + values = c("none", "minimal", "full")), makeIntegerLearnerParam(id = "lifesign.step", default = 1000L), makeDiscreteLearnerParam(id = "algorithm", default = "rprop+", - values = c("backprop", "rprop+", "rprop-", "sag", "slr")), + values = c("backprop", "rprop+", "rprop-", "sag", "slr")), makeDiscreteLearnerParam(id = "err.fct", default = "ce", - values = c("sse", "ce")), + values = c("sse", "ce")), # FIXME default in neuralnet() or err.fct is "sse" makeDiscreteLearnerParam(id = "act.fct", default = "logistic", - values = c("logistic", "tanh")), + values = c("logistic", "tanh")), makeNumericVectorLearnerParam(id = "exclude"), makeNumericVectorLearnerParam(id = "constant.weights"), makeLogicalLearnerParam(id = "likelihood", default = FALSE) @@ -37,31 +38,33 @@ makeRLearner.classif.neuralnet = function() { } #' @export -trainLearner.classif.neuralnet = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.neuralnet = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) cf = as.character(f) taskdat = getTaskData(.task, .subset) nms = names(taskdat) formula.head = as.character(f)[2] - if (is.character(taskdat[[formula.head]])){ + if (is.character(taskdat[[formula.head]])) { taskdat[[formula.head]] = as.factor(taskdat[[formula.head]]) taskdat[[formula.head]] = as.numeric(taskdat[[formula.head]]) } - if (is.factor(taskdat[[formula.head]])){ + if (is.factor(taskdat[[formula.head]])) { taskdat[[formula.head]] = as.numeric(taskdat[[formula.head]]) } lvls = length(unique(taskdat[[formula.head]])) - if (length(lvls) > 2) + if (length(lvls) > 2) { stop("Use neuralnet to do binary classification") - if (!all(taskdat[[formula.head]] == 0 | taskdat[[formula.head]] == 1)){ + } + if (!all(taskdat[[formula.head]] == 0 | taskdat[[formula.head]] == 1)) { taskdat[[formula.head]] = taskdat[[formula.head]] - 1 } - if (sum(stri_detect_regex(cf, "\\.")) > 0){ + if (sum(stri_detect_regex(cf, "\\.")) > 0) { varnames = nms[nms != formula.head] formula.head = stri_paste("as.numeric(", formula.head, ")~", sep = " ") formula.expand = stri_paste(formula.head, - stri_paste(varnames, collapse = "+", sep = " "), - sep = " ") + stri_paste(varnames, collapse = "+", sep = " "), + sep = " ") formula.expand = as.formula(formula.expand) f = formula.expand } @@ -71,6 +74,7 @@ trainLearner.classif.neuralnet = function(.learner, .task, .subset, .weights = N #' @export predictLearner.classif.neuralnet = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, response = "class", prob = "raw") p = neuralnet::compute(x = .model$learner.model, covariate = .newdata, ...) diff --git a/R/RLearner_classif_nnTrain.R b/R/RLearner_classif_nnTrain.R index 1b20be0070..52ecfc3a08 100644 --- a/R/RLearner_classif_nnTrain.R +++ b/R/RLearner_classif_nnTrain.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.nnTrain = function() { + makeRLearnerClassif( cl = "classif.nnTrain", package = "deepnet", @@ -29,6 +30,7 @@ makeRLearner.classif.nnTrain = function() { #' @export trainLearner.classif.nnTrain = function(.learner, .task, .subset, .weights = NULL, max.number.of.layers = Inf, hidden = 10, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) y = as.numeric(d$target) dict = sort(unique(y)) @@ -42,6 +44,7 @@ trainLearner.classif.nnTrain = function(.learner, .task, .subset, .weights = NUL #' @export predictLearner.classif.nnTrain = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, response = "class", prob = "raw") pred = deepnet::nn.predict(.model$learner.model, data.matrix(.newdata)) colnames(pred) = .model$factor.levels[[1]] diff --git a/R/RLearner_classif_nnet.R b/R/RLearner_classif_nnet.R index b7687dd53d..cc9bda52dc 100644 --- a/R/RLearner_classif_nnet.R +++ b/R/RLearner_classif_nnet.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.nnet = function() { + makeRLearnerClassif( cl = "classif.nnet", package = "nnet", @@ -8,10 +9,10 @@ makeRLearner.classif.nnet = function() { # FIXME size seems to have no default in nnet(). If it has, par.vals is redundant makeIntegerLearnerParam(id = "maxit", default = 100L, lower = 1L), # nnet seems to set these manually and hard for classification..... -# makeLogicalLearnerParam(id = "linout", default = FALSE, requires = quote(entropy == FALSE && softmax == FALSE && censored == FALSE)), -# makeLogicalLearnerParam(id = "entropy", default = FALSE, requires = quote(linout == FALSE && softmax == FALSE && censored == FALSE)), -# makeLogicalLearnerParam(id = "softmax", default = FALSE, requires = quote(entropy == FALSE && linout == FALSE && censored == FALSE)), -# makeLogicalLearnerParam(id = "censored", default = FALSE, requires = quote(linout == FALSE && softmax == FALSE && entropy == FALSE)), + # makeLogicalLearnerParam(id = "linout", default = FALSE, requires = quote(entropy == FALSE && softmax == FALSE && censored == FALSE)), + # makeLogicalLearnerParam(id = "entropy", default = FALSE, requires = quote(linout == FALSE && softmax == FALSE && censored == FALSE)), + # makeLogicalLearnerParam(id = "softmax", default = FALSE, requires = quote(entropy == FALSE && linout == FALSE && censored == FALSE)), + # makeLogicalLearnerParam(id = "censored", default = FALSE, requires = quote(linout == FALSE && softmax == FALSE && entropy == FALSE)), makeLogicalLearnerParam(id = "skip", default = FALSE), makeNumericLearnerParam(id = "rang", default = 0.7), makeNumericLearnerParam(id = "decay", default = 0), @@ -31,11 +32,12 @@ makeRLearner.classif.nnet = function() { } #' @export -trainLearner.classif.nnet = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.nnet = function(.learner, .task, .subset, .weights = NULL, ...) { + if (is.null(.weights)) { f = getTaskFormula(.task) nnet::nnet(f, data = getTaskData(.task, .subset), ...) - } else { + } else { f = getTaskFormula(.task) nnet::nnet(f, data = getTaskData(.task, .subset), weights = .weights, ...) } @@ -43,16 +45,18 @@ trainLearner.classif.nnet = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.nnet = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, response = "class", prob = "raw") p = predict(.model$learner.model, newdata = .newdata, type = type, ...) - if (type == "class") + if (type == "class") { return(as.factor(p)) - else { + } else { if (length(.model$task.desc$class.levels) == 2L) { y = cbind(1 - p, p) colnames(y) = .model$learner.model$lev return(y) - } else + } else { return(p) + } } } diff --git a/R/RLearner_classif_nodeHarvest.R b/R/RLearner_classif_nodeHarvest.R index 42b522a426..8a5348e23e 100644 --- a/R/RLearner_classif_nodeHarvest.R +++ b/R/RLearner_classif_nodeHarvest.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.nodeHarvest = function() { + makeRLearnerClassif( cl = "classif.nodeHarvest", package = "nodeHarvest", @@ -23,12 +24,14 @@ makeRLearner.classif.nodeHarvest = function() { #' @export trainLearner.classif.nodeHarvest = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE, recode.target = "01") nodeHarvest::nodeHarvest(X = d$data, Y = d$target, ...) } #' @export predictLearner.classif.nodeHarvest = function(.learner, .model, .newdata, ...) { + levs = c(.model$task.desc$negative, .model$task.desc$positive) p = predict(.model$learner.model, .newdata, ...) if (.learner$predict.type == "prob") { diff --git a/R/RLearner_classif_pamr.R b/R/RLearner_classif_pamr.R index d83fd772f3..dbac3b59ce 100644 --- a/R/RLearner_classif_pamr.R +++ b/R/RLearner_classif_pamr.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.pamr = function() { + makeRLearnerClassif( cl = "classif.pamr", package = "pamr", @@ -14,7 +15,7 @@ makeRLearner.classif.pamr = function() { makeNumericVectorLearnerParam(id = "prior", lower = 0, upper = 1), makeLogicalLearnerParam(id = "remove.zeros", default = TRUE), makeDiscreteLearnerParam(id = "sign.contrast", default = "both", values = c("both", "negative", "positive")), - #we cannot the use the orginal argument name 'threshold', because it's already used + # we cannot the use the orginal argument name 'threshold', because it's already used makeNumericLearnerParam(id = "threshold.predict", default = 1, when = "predict") # FIXME threshold in pamr.predict() seems to have no default. If it has 1 as default, par.vals is redundant ), @@ -29,12 +30,14 @@ makeRLearner.classif.pamr = function() { #' @export trainLearner.classif.pamr = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) pamr::pamr.train(data = list(x = t(d$data), y = d$target), ...) } #' @export predictLearner.classif.pamr = function(.learner, .model, .newdata, threshold.predict, ...) { + type = ifelse(.learner$predict.type == "prob", "posterior", "class") pamr::pamr.predict(.model$learner.model, t(.newdata), threshold = threshold.predict, type = type, ...) } diff --git a/R/RLearner_classif_penalized.R b/R/RLearner_classif_penalized.R index f72407d217..d0339e812b 100644 --- a/R/RLearner_classif_penalized.R +++ b/R/RLearner_classif_penalized.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.penalized = function() { + makeRLearnerClassif( cl = "classif.penalized", package = "!penalized", @@ -28,19 +29,21 @@ makeRLearner.classif.penalized = function() { } #' @export -trainLearner.classif.penalized = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.penalized = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) penalized::penalized(f, data = getTaskData(.task, .subset), model = "logistic", ...) } #' @export predictLearner.classif.penalized = function(.learner, .model, .newdata, ...) { + m = .model$learner.model levs = .model$task.desc$class.levels # FIXME: should be removed, reported in issue 840 m@formula$unpenalized[[2L]] = as.symbol(.model$task.desc$target) .newdata[, .model$task.desc$target] = 0 - pred = penalized::predict(m, data = .newdata, ...) + pred = penalized::predict(m, data = .newdata, ...) if (.learner$predict.type == "prob") { propVectorToMatrix(pred, levs) } else { diff --git a/R/RLearner_classif_plr.R b/R/RLearner_classif_plr.R index 0d765aca68..447eca6f64 100644 --- a/R/RLearner_classif_plr.R +++ b/R/RLearner_classif_plr.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.plr = function() { + makeRLearnerClassif( cl = "classif.plr", package = "stepPlr", @@ -17,28 +18,32 @@ makeRLearner.classif.plr = function() { } #' @export -trainLearner.classif.plr = function(.learner, .task, .subset, .weights = NULL, cp.type, cp, ...) { +trainLearner.classif.plr = function(.learner, .task, .subset, .weights = NULL, cp.type, cp, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE, recode.target = "01") # cp.type has preference - if (!missing(cp.type)) + if (!missing(cp.type)) { cp2 = cp.type - else if (!missing(cp)) + } else if (!missing(cp)) { cp2 = cp - else + } else { cp2 = NULL + } args = list(x = d$data, y = d$target) args$cp = cp2 - if (!is.null(.weights)) + if (!is.null(.weights)) { args$weights = .weights + } args = c(args, list(...)) do.call(stepPlr::plr, args) } #' @export predictLearner.classif.plr = function(.learner, .model, .newdata, ...) { + p = stepPlr::predict.plr(.model$learner.model, newx = .newdata, type = "response", ...) levs = c(.model$task.desc$negative, .model$task.desc$positive) - if (.learner$predict.type == "prob"){ + if (.learner$predict.type == "prob") { y = propVectorToMatrix(p, levs) return(y) } else { diff --git a/R/RLearner_classif_plsdaCaret.R b/R/RLearner_classif_plsdaCaret.R index 237382a121..dbdc7ab070 100644 --- a/R/RLearner_classif_plsdaCaret.R +++ b/R/RLearner_classif_plsdaCaret.R @@ -1,12 +1,13 @@ #' @export makeRLearner.classif.plsdaCaret = function() { + makeRLearnerClassif(cl = "classif.plsdaCaret", package = c("caret", "pls"), par.set = makeParamSet( makeIntegerLearnerParam(id = "ncomp", default = 2, lower = 1), makeDiscreteLearnerParam(id = "probMethod", values = c("softmax", "Bayes"), default = "softmax"), makeDiscreteLearnerParam(id = "method", default = "kernelpls", - values = c("kernelpls", "widekernelpls", "simpls", "oscorespls")) + values = c("kernelpls", "widekernelpls", "simpls", "oscorespls")) ), properties = c("numerics", "prob", "twoclass"), name = "Partial Least Squares (PLS) Discriminant Analysis", @@ -17,15 +18,17 @@ makeRLearner.classif.plsdaCaret = function() { #' @export trainLearner.classif.plsdaCaret = function(.learner, .task, .subset, .weights, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) caret::plsda(d$data, d$target, ...) } #' @export predictLearner.classif.plsdaCaret = function(.learner, .model, .newdata, ...) { + type = ifelse(.learner$predict.type == "response", "class", "prob") p = predict(.model$learner.model, newdata = .newdata, type = type, ...) - if (type == "prob"){ + if (type == "prob") { p = p[, , 1] } return(p) diff --git a/R/RLearner_classif_probit.R b/R/RLearner_classif_probit.R index 5eab99eeff..f4779aa830 100644 --- a/R/RLearner_classif_probit.R +++ b/R/RLearner_classif_probit.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.probit = function() { + makeRLearnerClassif( cl = "classif.probit", package = "stats", @@ -18,7 +19,8 @@ makeRLearner.classif.probit = function() { } #' @export -trainLearner.classif.probit = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.probit = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) stats::glm(f, data = getTaskData(.task, .subset), family = binomial(link = "probit"), weights = .weights, ...) @@ -26,6 +28,7 @@ trainLearner.classif.probit = function(.learner, .task, .subset, .weights = NULL #' @export predictLearner.classif.probit = function(.learner, .model, .newdata, ...) { + x = predict(.model$learner.model, newdata = .newdata, type = "response", ...) levs = .model$task.desc$class.levels if (.learner$predict.type == "prob") { @@ -36,4 +39,3 @@ predictLearner.classif.probit = function(.learner, .model, .newdata, ...) { unname(p) } } - diff --git a/R/RLearner_classif_qda.R b/R/RLearner_classif_qda.R index 3e526276a7..c54356b7ba 100644 --- a/R/RLearner_classif_qda.R +++ b/R/RLearner_classif_qda.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.qda = function() { + makeRLearnerClassif( cl = "classif.qda", package = "MASS", @@ -18,16 +19,19 @@ makeRLearner.classif.qda = function() { } #' @export -trainLearner.classif.qda = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.qda = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) MASS::qda(f, data = getTaskData(.task, .subset, recode.target = "drop.levels"), ...) } #' @export predictLearner.classif.qda = function(.learner, .model, .newdata, predict.method = "plug-in", ...) { + p = predict(.model$learner.model, newdata = .newdata, method = predict.method, ...) - if (.learner$predict.type == "response") + if (.learner$predict.type == "response") { return(p$class) - else + } else { return(p$posterior) + } } diff --git a/R/RLearner_classif_quaDA.R b/R/RLearner_classif_quaDA.R index 5e0b7879a0..9cd92996b7 100644 --- a/R/RLearner_classif_quaDA.R +++ b/R/RLearner_classif_quaDA.R @@ -1,10 +1,11 @@ #' @export makeRLearner.classif.quaDA = function() { + makeRLearnerClassif( cl = "classif.quaDA", package = "DiscriMiner", par.set = makeParamSet( - #makeNumericVectorLearnerParam(id = "prior", lower = 0, upper = 1, default = NULL), + # makeNumericVectorLearnerParam(id = "prior", lower = 0, upper = 1, default = NULL), makeDiscreteLearnerParam(id = "validation", values = list(crossval = "crossval", learntest = "learntest", NULL = NULL), default = NULL, tunable = FALSE) ), properties = c("twoclass", "multiclass", "numerics"), @@ -15,15 +16,17 @@ makeRLearner.classif.quaDA = function() { } #' @export -trainLearner.classif.quaDA = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.quaDA = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE, recode.target = "drop.levels") DiscriMiner::quaDA(variables = d$data, group = d$target, ...) } #' @export predictLearner.classif.quaDA = function(.learner, .model, .newdata, ...) { + m = .model$learner.model p = DiscriMiner::classify(m, newdata = .newdata) - #p$scores #we loose this information + # p$scores #we loose this information p$pred_class } diff --git a/R/RLearner_classif_rFerns.R b/R/RLearner_classif_rFerns.R index dd14bbd813..6866a9e399 100644 --- a/R/RLearner_classif_rFerns.R +++ b/R/RLearner_classif_rFerns.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.rFerns = function() { + makeRLearnerClassif( cl = "classif.rFerns", package = "rFerns", @@ -20,16 +21,19 @@ makeRLearner.classif.rFerns = function() { #' @export trainLearner.classif.rFerns = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) rFerns::rFerns(x = d$data, y = d$target, ...) } #' @export predictLearner.classif.rFerns = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, .newdata, ...) } #' @export getOOBPredsLearner.classif.rFerns = function(.learner, .model) { + getLearnerModel(.model, more.unwrap = TRUE)$oobPreds } diff --git a/R/RLearner_classif_randomForest.R b/R/RLearner_classif_randomForest.R index ee53b4d0b2..3c8567eebd 100644 --- a/R/RLearner_classif_randomForest.R +++ b/R/RLearner_classif_randomForest.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.randomForest = function() { + makeRLearnerClassif( cl = "classif.randomForest", package = "randomForest", @@ -33,27 +34,33 @@ makeRLearner.classif.randomForest = function() { #' @export trainLearner.classif.randomForest = function(.learner, .task, .subset, .weights = NULL, classwt = NULL, cutoff, ...) { + f = getTaskFormula(.task) data = getTaskData(.task, .subset, recode.target = "drop.levels") levs = levels(data[, getTaskTargetNames(.task)]) n = length(levs) - if (missing(cutoff)) + if (missing(cutoff)) { cutoff = rep(1 / n, n) - if (!missing(classwt) && is.numeric(classwt) && length(classwt) == n && is.null(names(classwt))) + } + if (!missing(classwt) && is.numeric(classwt) && length(classwt) == n && is.null(names(classwt))) { names(classwt) = levs - if (is.numeric(cutoff) && length(cutoff) == n && is.null(names(cutoff))) + } + if (is.numeric(cutoff) && length(cutoff) == n && is.null(names(cutoff))) { names(cutoff) = levs + } randomForest::randomForest(f, data = data, classwt = classwt, cutoff = cutoff, ...) } #' @export predictLearner.classif.randomForest = function(.learner, .model, .newdata, ...) { + type = ifelse(.learner$predict.type == "response", "response", "prob") predict(.model$learner.model, newdata = .newdata, type = type, ...) } #' @export getOOBPredsLearner.classif.randomForest = function(.learner, .model) { + if (.learner$predict.type == "response") { m = getLearnerModel(.model, more.unwrap = TRUE) unname(m$predicted) @@ -64,6 +71,7 @@ getOOBPredsLearner.classif.randomForest = function(.learner, .model) { #' @export getFeatureImportanceLearner.classif.randomForest = function(.learner, .model, ...) { + mod = getLearnerModel(.model, more.unwrap = TRUE) ctrl = list(...) if (is.null(ctrl$type)) { @@ -71,8 +79,9 @@ getFeatureImportanceLearner.classif.randomForest = function(.learner, .model, .. } else { if (ctrl$type == 1L) { has.fiv = .learner$par.vals$importance - if (is.null(has.fiv) || has.fiv != TRUE) + if (is.null(has.fiv) || has.fiv != TRUE) { stop("You need to train the learner with parameter 'importance' set to TRUE") + } } } diff --git a/R/RLearner_classif_randomForestSRC.R b/R/RLearner_classif_randomForestSRC.R index 98e6e4e49b..cc92856046 100644 --- a/R/RLearner_classif_randomForestSRC.R +++ b/R/RLearner_classif_randomForestSRC.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.randomForestSRC = function() { + makeRLearnerClassif( cl = "classif.randomForestSRC", package = "randomForestSRC", @@ -51,12 +52,14 @@ makeRLearner.classif.randomForestSRC = function() { #' @export trainLearner.classif.randomForestSRC = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) randomForestSRC::rfsrc(f, data = getTaskData(.task, .subset, recode.target = "drop.levels"), case.wt = .weights, ...) } #' @export predictLearner.classif.randomForestSRC = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, newdata = .newdata, membership = FALSE, ...) if (.learner$predict.type == "prob") { return(p$predicted) @@ -67,6 +70,7 @@ predictLearner.classif.randomForestSRC = function(.learner, .model, .newdata, .. #' @export getOOBPredsLearner.classif.randomForestSRC = function(.learner, .model) { + preds = getLearnerModel(.model, more.unwrap = TRUE)$predicted.oob if (.learner$predict.type == "response") { factor(colnames(preds)[max.col(preds)], levels = colnames(preds)) @@ -77,6 +81,7 @@ getOOBPredsLearner.classif.randomForestSRC = function(.learner, .model) { #' @export getFeatureImportanceLearner.classif.randomForestSRC = function(.learner, .model, ...) { + mod = getLearnerModel(.model, more.unwrap = TRUE) randomForestSRC::vimp(mod, ...)$importance[, "all"] } diff --git a/R/RLearner_classif_ranger.R b/R/RLearner_classif_ranger.R index 5f88dcaa74..4ed363881a 100644 --- a/R/RLearner_classif_ranger.R +++ b/R/RLearner_classif_ranger.R @@ -1,6 +1,7 @@ #' @export makeRLearner.classif.ranger = function() { + makeRLearnerClassif( cl = "classif.ranger", package = "ranger", @@ -35,6 +36,7 @@ makeRLearner.classif.ranger = function() { #' @export trainLearner.classif.ranger = function(.learner, .task, .subset, .weights = NULL, ...) { + tn = getTaskTargetNames(.task) ranger::ranger(formula = NULL, dependent.variable = tn, data = getTaskData(.task, .subset), probability = (.learner$predict.type == "prob"), case.weights = .weights, ...) @@ -42,17 +44,20 @@ trainLearner.classif.ranger = function(.learner, .task, .subset, .weights = NULL #' @export predictLearner.classif.ranger = function(.learner, .model, .newdata, ...) { + p = predict(object = .model$learner.model, data = .newdata, ...) return(p$predictions) } #' @export getOOBPredsLearner.classif.ranger = function(.learner, .model) { + getLearnerModel(.model, more.unwrap = TRUE)$predictions } #' @export getFeatureImportanceLearner.classif.ranger = function(.learner, .model, ...) { + has.fiv = .learner$par.vals$importance if (is.null(has.fiv) || has.fiv == "none") { stop("You must set the learners parameter value for importance to diff --git a/R/RLearner_classif_rda.R b/R/RLearner_classif_rda.R index 720bb1348c..c6232c7a35 100644 --- a/R/RLearner_classif_rda.R +++ b/R/RLearner_classif_rda.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.rda = function() { + makeRLearnerClassif( cl = "classif.rda", package = "klaR", @@ -30,15 +31,18 @@ makeRLearner.classif.rda = function() { } #' @export -trainLearner.classif.rda = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.rda = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) klaR::rda(f, data = getTaskData(.task, .subset), ...) } #' @export predictLearner.classif.rda = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, newdata = .newdata, ...) - if (.learner$predict.type == "response") + if (.learner$predict.type == "response") { return(p$class) + } return(p$posterior) } diff --git a/R/RLearner_classif_rknn.R b/R/RLearner_classif_rknn.R index fcae1500cd..213277a4d3 100644 --- a/R/RLearner_classif_rknn.R +++ b/R/RLearner_classif_rknn.R @@ -1,5 +1,6 @@ #' @export -makeRLearner.classif.rknn = function(){ +makeRLearner.classif.rknn = function() { + makeRLearnerClassif( cl = "classif.rknn", package = "rknn", @@ -20,13 +21,15 @@ makeRLearner.classif.rknn = function(){ } #' @export -trainLearner.classif.rknn = function(.learner, .task, .subset, .weights = NULL, ...){ +trainLearner.classif.rknn = function(.learner, .task, .subset, .weights = NULL, ...) { + z = getTaskData(.task, .subset, target.extra = TRUE) c(list(data = z$data, y = z$target), list(...)) } #' @export -predictLearner.classif.rknn = function(.learner, .model, .newdata, ...){ +predictLearner.classif.rknn = function(.learner, .model, .newdata, ...) { + args = .model$learner.model args$newdata = .newdata do.call(rknn::rknn, args)$pred diff --git a/R/RLearner_classif_rotationForest.R b/R/RLearner_classif_rotationForest.R index 1cb99c7f42..5eba59af45 100644 --- a/R/RLearner_classif_rotationForest.R +++ b/R/RLearner_classif_rotationForest.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.rotationForest = function() { + makeRLearnerClassif( cl = "classif.rotationForest", package = "rotationForest", @@ -11,26 +12,28 @@ makeRLearner.classif.rotationForest = function() { name = "Rotation Forest", short.name = "rotationForest", callees = "rotationForest" - ) + ) } #' @export trainLearner.classif.rotationForest = function(.learner, .task, .subset, .weights = NULL, ...) { + df = getTaskData(.task, .subset, target.extra = TRUE) features = df$data - #rotationForest needs 0-1 coding + # rotationForest needs 0-1 coding target = as.factor(ifelse(df$target == .task$task.desc$positive, 1L, 0L)) rotationForest::rotationForest(x = features, y = target, ...) } #' @export predictLearner.classif.rotationForest = function(.learner, .model, .newdata, ...) { + features = .newdata[, names(.newdata) == .model$features] p = predict(.model$learner.model, newdata = features, all = FALSE, ...) - if (.learner$predict.type == "prob"){ + if (.learner$predict.type == "prob") { levs = c(.model$task.desc$positive, .model$task.desc$negative) propVectorToMatrix(1 - p, levs) - }else{ + } else { as.factor(ifelse(p > 0.5, .model$task.desc$positive, .model$task.desc$negative)) } } diff --git a/R/RLearner_classif_rpart.R b/R/RLearner_classif_rpart.R index ebc56bc535..596aaa219e 100644 --- a/R/RLearner_classif_rpart.R +++ b/R/RLearner_classif_rpart.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.rpart = function() { + makeRLearnerClassif( cl = "classif.rpart", package = "rpart", @@ -27,11 +28,12 @@ makeRLearner.classif.rpart = function() { #' @export trainLearner.classif.rpart = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset) if (is.null(.weights)) { f = getTaskFormula(.task) rpart::rpart(f, data = d, ...) - } else { + } else { f = getTaskFormula(.task) rpart::rpart(f, data = d, weights = .weights, ...) } @@ -39,12 +41,14 @@ trainLearner.classif.rpart = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.classif.rpart = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, prob = "prob", "class") predict(.model$learner.model, newdata = .newdata, type = type, ...) } #' @export getFeatureImportanceLearner.classif.rpart = function(.learner, .model, ...) { + mod = getLearnerModel(.model, more.unwrap = TRUE) mod$variable.importance } diff --git a/R/RLearner_classif_rrlda.R b/R/RLearner_classif_rrlda.R index c5a8979fae..d3a3969fd1 100644 --- a/R/RLearner_classif_rrlda.R +++ b/R/RLearner_classif_rrlda.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.rrlda = function() { + makeRLearnerClassif( cl = "classif.rrlda", package = "!rrlda", @@ -19,12 +20,14 @@ makeRLearner.classif.rrlda = function() { } #' @export -trainLearner.classif.rrlda = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.rrlda = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE, recode.target = "drop.levels") rrlda::rrlda(x = d$data, grouping = d$target, ...) } #' @export predictLearner.classif.rrlda = function(.learner, .model, .newdata, ...) { + as.factor(predict(.model$learner.model, x = .newdata, ...)$class) } diff --git a/R/RLearner_classif_saeDNN.R b/R/RLearner_classif_saeDNN.R index 1b82431063..acc18cf517 100644 --- a/R/RLearner_classif_saeDNN.R +++ b/R/RLearner_classif_saeDNN.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.saeDNN = function() { + makeRLearnerClassif( cl = "classif.saeDNN", package = "deepnet", @@ -26,7 +27,8 @@ makeRLearner.classif.saeDNN = function() { } #' @export -trainLearner.classif.saeDNN = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.saeDNN = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) y = as.numeric(d$target) dict = sort(unique(y)) @@ -40,6 +42,7 @@ trainLearner.classif.saeDNN = function(.learner, .task, .subset, .weights = NULL #' @export predictLearner.classif.saeDNN = function(.learner, .model, .newdata, ...) { + type = switch(.learner$predict.type, response = "class", prob = "raw") pred = deepnet::nn.predict(.model$learner.model, data.matrix(.newdata)) colnames(pred) = .model$factor.levels[[1]] diff --git a/R/RLearner_classif_sda.R b/R/RLearner_classif_sda.R index 06ebf7003d..07782dcbfb 100644 --- a/R/RLearner_classif_sda.R +++ b/R/RLearner_classif_sda.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.sda = function() { + makeRLearnerClassif( cl = "classif.sda", package = "sda", @@ -18,16 +19,19 @@ makeRLearner.classif.sda = function() { } #' @export -trainLearner.classif.sda = function(.learner, .task, .subset, ...) { +trainLearner.classif.sda = function(.learner, .task, .subset, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) sda::sda(Xtrain = as.matrix(d$data), L = d$target, ...) } #' @export predictLearner.classif.sda = function(.learner, .model, .newdata, ...) { + p = sda::predict.sda(.model$learner.model, as.matrix(.newdata)) - if (.learner$predict.type == "response") + if (.learner$predict.type == "response") { return(p$class) - else + } else { return(p$posterior) + } } diff --git a/R/RLearner_classif_sparseLDA.R b/R/RLearner_classif_sparseLDA.R index d4c7490a01..2e363d54da 100644 --- a/R/RLearner_classif_sparseLDA.R +++ b/R/RLearner_classif_sparseLDA.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.sparseLDA = function() { + makeRLearnerClassif( cl = "classif.sparseLDA", # FIXME: maybe again broken NAMESPACE / import in package, if we dont use !, solvebeta is not found @@ -19,7 +20,8 @@ makeRLearner.classif.sparseLDA = function() { } #' @export -trainLearner.classif.sparseLDA = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.sparseLDA = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) y = d$target lvls = levels(y) @@ -29,10 +31,12 @@ trainLearner.classif.sparseLDA = function(.learner, .task, .subset, .weights = N #' @export predictLearner.classif.sparseLDA = function(.learner, .model, .newdata, ...) { + p = sparseLDA::predict.sda(.model$learner.model, newdata = subset(.newdata, select = .model$features), ...) - if (.learner$predict.type == "response") + if (.learner$predict.type == "response") { return(p$class) - else + } else { return(p$posterior) + } } diff --git a/R/RLearner_classif_svm.R b/R/RLearner_classif_svm.R index e5d2ce43a4..de9f19ebb1 100644 --- a/R/RLearner_classif_svm.R +++ b/R/RLearner_classif_svm.R @@ -1,11 +1,12 @@ #' @export makeRLearner.classif.svm = function() { + makeRLearnerClassif( cl = "classif.svm", package = "e1071", par.set = makeParamSet( makeDiscreteLearnerParam(id = "type", default = "C-classification", values = c("C-classification", "nu-classification")), - makeNumericLearnerParam(id = "cost", default = 1, lower = 0, requires = quote(type == "C-classification")), + makeNumericLearnerParam(id = "cost", default = 1, lower = 0, requires = quote(type == "C-classification")), makeNumericLearnerParam(id = "nu", default = 0.5, requires = quote(type == "nu-classification")), makeNumericVectorLearnerParam("class.weights", len = NA_integer_, lower = 0), makeDiscreteLearnerParam(id = "kernel", default = "radial", values = c("linear", "polynomial", "radial", "sigmoid")), @@ -28,13 +29,15 @@ makeRLearner.classif.svm = function() { } #' @export -trainLearner.classif.svm = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.svm = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) e1071::svm(f, data = getTaskData(.task, .subset), probability = .learner$predict.type == "prob", ...) } #' @export predictLearner.classif.svm = function(.learner, .model, .newdata, ...) { + if (.learner$predict.type == "response") { predict(.model$learner.model, newdata = .newdata, ...) } else { diff --git a/R/RLearner_classif_xgboost.R b/R/RLearner_classif_xgboost.R index 4bc4a46035..2d573a9c65 100644 --- a/R/RLearner_classif_xgboost.R +++ b/R/RLearner_classif_xgboost.R @@ -1,5 +1,6 @@ #' @export makeRLearner.classif.xgboost = function() { + makeRLearnerClassif( cl = "classif.xgboost", package = "xgboost", @@ -41,11 +42,11 @@ makeRLearner.classif.xgboost = function() { makeNumericLearnerParam(id = "rate_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")), makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")), # TODO: uncomment the following after the next CRAN update, and set max_depth's lower = 0L - #makeLogicalLearnerParam(id = "one_drop", default = FALSE, requires = quote(booster == "dart")), - #makeDiscreteLearnerParam(id = "tree_method", default = "exact", values = c("exact", "hist"), requires = quote(booster != "gblinear")), - #makeDiscreteLearnerParam(id = "grow_policy", default = "depthwise", values = c("depthwise", "lossguide"), requires = quote(tree_method == "hist")), - #makeIntegerLearnerParam(id = "max_leaves", default = 0L, lower = 0L, requires = quote(grow_policy == "lossguide")), - #makeIntegerLearnerParam(id = "max_bin", default = 256L, lower = 2L, requires = quote(tree_method == "hist")), + # makeLogicalLearnerParam(id = "one_drop", default = FALSE, requires = quote(booster == "dart")), + # makeDiscreteLearnerParam(id = "tree_method", default = "exact", values = c("exact", "hist"), requires = quote(booster != "gblinear")), + # makeDiscreteLearnerParam(id = "grow_policy", default = "depthwise", values = c("depthwise", "lossguide"), requires = quote(tree_method == "hist")), + # makeIntegerLearnerParam(id = "max_leaves", default = 0L, lower = 0L, requires = quote(grow_policy == "lossguide")), + # makeIntegerLearnerParam(id = "max_bin", default = 256L, lower = 2L, requires = quote(tree_method == "hist")), makeUntypedLearnerParam(id = "callbacks", default = list(), tunable = FALSE) ), par.vals = list(nrounds = 1L, verbose = 0L), @@ -58,49 +59,56 @@ makeRLearner.classif.xgboost = function() { } #' @export -trainLearner.classif.xgboost = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.classif.xgboost = function(.learner, .task, .subset, .weights = NULL, ...) { td = getTaskDesc(.task) parlist = list(...) nc = length(td$class.levels) - if (is.null(parlist$objective)) + if (is.null(parlist$objective)) { parlist$objective = ifelse(nc == 2L, "binary:logistic", "multi:softprob") + } - if (.learner$predict.type == "prob" && parlist$objective == "multi:softmax") + if (.learner$predict.type == "prob" && parlist$objective == "multi:softmax") { stop("objective = 'multi:softmax' does not work with predict.type = 'prob'") + } - #if we use softprob or softmax as objective we have to add the number of classes 'num_class' - if (parlist$objective %in% c("multi:softprob", "multi:softmax")) + # if we use softprob or softmax as objective we have to add the number of classes 'num_class' + if (parlist$objective %in% c("multi:softprob", "multi:softmax")) { parlist$num_class = nc + } task.data = getTaskData(.task, .subset, target.extra = TRUE) label = match(as.character(task.data$target), td$class.levels) - 1 parlist$data = xgboost::xgb.DMatrix(data = data.matrix(task.data$data), label = label) - if (!is.null(.weights)) + if (!is.null(.weights)) { xgboost::setinfo(parlist$data, "weight", .weights) + } - if (is.null(parlist$watchlist)) + if (is.null(parlist$watchlist)) { parlist$watchlist = list(train = parlist$data) + } do.call(xgboost::xgb.train, parlist) } #' @export predictLearner.classif.xgboost = function(.learner, .model, .newdata, ...) { + td = .model$task.desc m = .model$learner.model cls = td$class.levels nc = length(cls) obj = .learner$par.vals$objective - if (is.null(obj)) + if (is.null(obj)) { .learner$par.vals$objective = ifelse(nc == 2L, "binary:logistic", "multi:softprob") + } p = predict(m, newdata = data.matrix(.newdata), ...) - if (nc == 2L) { #binaryclass + if (nc == 2L) { # binaryclass if (.learner$par.vals$objective == "multi:softprob") { y = matrix(p, nrow = length(p) / nc, ncol = nc, byrow = TRUE) colnames(y) = cls @@ -118,9 +126,9 @@ predictLearner.classif.xgboost = function(.learner, .model, .newdata, ...) { p = factor(p, levels = colnames(y)) return(p) } - } else { #multiclass - if (.learner$par.vals$objective == "multi:softmax") { - p = as.factor(p) #special handling for multi:softmax which directly predicts class levels + } else { # multiclass + if (.learner$par.vals$objective == "multi:softmax") { + p = as.factor(p) # special handling for multi:softmax which directly predicts class levels levels(p) = cls return(p) } else { @@ -139,6 +147,7 @@ predictLearner.classif.xgboost = function(.learner, .model, .newdata, ...) { #' @export getFeatureImportanceLearner.classif.xgboost = function(.learner, .model, ...) { + mod = getLearnerModel(.model, more.unwrap = TRUE) imp = xgboost::xgb.importance(feature_names = .model$features, model = mod, ...) diff --git a/R/RLearner_cluster_Cobweb.R b/R/RLearner_cluster_Cobweb.R index 1ea7bc181f..771c6ae159 100644 --- a/R/RLearner_cluster_Cobweb.R +++ b/R/RLearner_cluster_Cobweb.R @@ -1,5 +1,6 @@ #' @export makeRLearner.cluster.Cobweb = function() { + makeRLearnerCluster( cl = "cluster.Cobweb", package = "RWeka", @@ -16,14 +17,15 @@ makeRLearner.cluster.Cobweb = function() { } #' @export -trainLearner.cluster.Cobweb = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.cluster.Cobweb = function(.learner, .task, .subset, .weights = NULL, ...) { + ctrl = RWeka::Weka_control(...) RWeka::Cobweb(getTaskData(.task, .subset), control = ctrl) } #' @export predictLearner.cluster.Cobweb = function(.learner, .model, .newdata, ...) { + # RWeka returns cluster indices (i.e. starting from 0, which some tools don't like as.integer(predict(.model$learner.model, .newdata, ...)) + 1L } - diff --git a/R/RLearner_cluster_EM.R b/R/RLearner_cluster_EM.R index 527218612a..7db5b17be0 100644 --- a/R/RLearner_cluster_EM.R +++ b/R/RLearner_cluster_EM.R @@ -1,5 +1,6 @@ #' @export makeRLearner.cluster.EM = function() { + makeRLearnerCluster( cl = "cluster.EM", package = "RWeka", @@ -25,14 +26,15 @@ makeRLearner.cluster.EM = function() { } #' @export -trainLearner.cluster.EM = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.cluster.EM = function(.learner, .task, .subset, .weights = NULL, ...) { + ctrl = RWeka::Weka_control(...) RWeka::make_Weka_clusterer("weka/clusterers/EM")(getTaskData(.task, .subset), control = ctrl) } #' @export predictLearner.cluster.EM = function(.learner, .model, .newdata, ...) { + # EM returns cluster indices (i.e. starting from 0, which some tools don't like as.integer(predict(.model$learner.model, .newdata, ...)) + 1L } - diff --git a/R/RLearner_cluster_FarthestFirst.R b/R/RLearner_cluster_FarthestFirst.R index 3e19e11633..6bd002b100 100644 --- a/R/RLearner_cluster_FarthestFirst.R +++ b/R/RLearner_cluster_FarthestFirst.R @@ -1,5 +1,6 @@ #' @export makeRLearner.cluster.FarthestFirst = function() { + makeRLearnerCluster( cl = "cluster.FarthestFirst", package = "RWeka", @@ -16,14 +17,15 @@ makeRLearner.cluster.FarthestFirst = function() { } #' @export -trainLearner.cluster.FarthestFirst = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.cluster.FarthestFirst = function(.learner, .task, .subset, .weights = NULL, ...) { + ctrl = RWeka::Weka_control(...) RWeka::FarthestFirst(getTaskData(.task, .subset), control = ctrl) } #' @export predictLearner.cluster.FarthestFirst = function(.learner, .model, .newdata, ...) { + # RWeka returns cluster indices (i.e. starting from 0, which some tools don't like as.integer(predict(.model$learner.model, .newdata, ...)) + 1L } - diff --git a/R/RLearner_cluster_SimpleKMeans.R b/R/RLearner_cluster_SimpleKMeans.R index 28bda58b23..5baa4c0e2d 100644 --- a/R/RLearner_cluster_SimpleKMeans.R +++ b/R/RLearner_cluster_SimpleKMeans.R @@ -1,5 +1,6 @@ #' @export makeRLearner.cluster.SimpleKMeans = function() { + makeRLearnerCluster( cl = "cluster.SimpleKMeans", package = "RWeka", @@ -30,14 +31,15 @@ makeRLearner.cluster.SimpleKMeans = function() { } #' @export -trainLearner.cluster.SimpleKMeans = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.cluster.SimpleKMeans = function(.learner, .task, .subset, .weights = NULL, ...) { + ctrl = RWeka::Weka_control(...) RWeka::SimpleKMeans(getTaskData(.task, .subset), control = ctrl) } #' @export predictLearner.cluster.SimpleKMeans = function(.learner, .model, .newdata, ...) { + # SimpleKMeans returns cluster indices (i.e. starting from 0, which some tools don't like as.integer(predict(.model$learner.model, .newdata, ...)) + 1L } - diff --git a/R/RLearner_cluster_XMeans.R b/R/RLearner_cluster_XMeans.R index ba344794f5..99ff79e363 100644 --- a/R/RLearner_cluster_XMeans.R +++ b/R/RLearner_cluster_XMeans.R @@ -1,5 +1,6 @@ #' @export makeRLearner.cluster.XMeans = function() { + makeRLearnerCluster( cl = "cluster.XMeans", package = "RWeka", @@ -30,14 +31,15 @@ makeRLearner.cluster.XMeans = function() { } #' @export -trainLearner.cluster.XMeans = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.cluster.XMeans = function(.learner, .task, .subset, .weights = NULL, ...) { + ctrl = RWeka::Weka_control(...) RWeka::XMeans(getTaskData(.task, .subset), control = ctrl) } #' @export predictLearner.cluster.XMeans = function(.learner, .model, .newdata, ...) { + # XMeans returns cluster indices (i.e. starting from 0, which some tools don't like as.integer(predict(.model$learner.model, .newdata, ...)) + 1L } - diff --git a/R/RLearner_cluster_cmeans.R b/R/RLearner_cluster_cmeans.R index c6f3dbdedf..230d39d438 100644 --- a/R/RLearner_cluster_cmeans.R +++ b/R/RLearner_cluster_cmeans.R @@ -1,5 +1,6 @@ #' @export makeRLearner.cluster.cmeans = function() { + makeRLearnerCluster( cl = "cluster.cmeans", package = c("e1071", "clue"), @@ -25,15 +26,16 @@ makeRLearner.cluster.cmeans = function() { #' @export trainLearner.cluster.cmeans = function(.learner, .task, .subset, .weights = NULL, reltol, ...) { + ctrl = learnerArgsToControl(list, reltol) e1071::cmeans(getTaskData(.task, .subset), control = ctrl, ...) } #' @export predictLearner.cluster.cmeans = function(.learner, .model, .newdata, ...) { + switch(.learner$predict.type, response = as.integer(clue::cl_predict(.model$learner.model, newdata = .newdata, type = "class_ids", ...)), prob = as.matrix(clue::cl_predict(.model$learner.model, newdata = .newdata, type = "memberships", ...)) ) } - diff --git a/R/RLearner_cluster_dbscan.R b/R/RLearner_cluster_dbscan.R index 58c622c005..1f07f610ec 100644 --- a/R/RLearner_cluster_dbscan.R +++ b/R/RLearner_cluster_dbscan.R @@ -1,5 +1,6 @@ #' @export makeRLearner.cluster.dbscan = function() { + makeRLearnerCluster( cl = "cluster.dbscan", package = "fpc", @@ -22,6 +23,7 @@ makeRLearner.cluster.dbscan = function() { #' @export trainLearner.cluster.dbscan = function(.learner, .task, .subset, .weights = NULL, ...) { + data = getTaskData(.task, .subset) model = fpc::dbscan(data, ...) # dbscan needs this in the prediction phase @@ -31,8 +33,8 @@ trainLearner.cluster.dbscan = function(.learner, .task, .subset, .weights = NULL #' @export predictLearner.cluster.dbscan = function(.learner, .model, .newdata, ...) { + indices = as.integer(predict(.model$learner.model, .model$learner.model$data, newdata = .newdata, ...)) indices[indices == 0L] = NA_integer_ return(indices) } - diff --git a/R/RLearner_cluster_kkmeans.R b/R/RLearner_cluster_kkmeans.R index bdb9da75e7..9341f2a139 100644 --- a/R/RLearner_cluster_kkmeans.R +++ b/R/RLearner_cluster_kkmeans.R @@ -1,5 +1,6 @@ #' @export makeRLearner.cluster.kkmeans = function() { + makeRLearnerCluster( cl = "cluster.kkmeans", package = "kernlab", @@ -33,15 +34,18 @@ makeRLearner.cluster.kkmeans = function() { #' @export trainLearner.cluster.kkmeans = function(.learner, .task, .subset, .weights = NULL, degree, offset, scale, sigma, order, length, lambda, normalized, ...) { + kpar = learnerArgsToControl(list, degree, offset, scale, sigma, order, length, lambda, normalized) - if (base::length(kpar) > 0L) + if (base::length(kpar) > 0L) { kernlab::kkmeans(as.matrix(getTaskData(.task, .subset)), kpar = kpar, ...) - else + } else { kernlab::kkmeans(as.matrix(getTaskData(.task, .subset)), ...) + } } #' @export predictLearner.cluster.kkmeans = function(.learner, .model, .newdata, .weights = NULL, ...) { + c = kernlab::centers(.model$learner.model) K = kernlab::kernelf(.model$learner.model) diff --git a/R/RLearner_cluster_kmeans.R b/R/RLearner_cluster_kmeans.R index 881586a489..d1840e58af 100644 --- a/R/RLearner_cluster_kmeans.R +++ b/R/RLearner_cluster_kmeans.R @@ -1,5 +1,6 @@ #' @export makeRLearner.cluster.kmeans = function() { + makeRLearnerCluster( cl = "cluster.kmeans", package = c("stats", "clue"), @@ -22,14 +23,15 @@ makeRLearner.cluster.kmeans = function() { #' @export trainLearner.cluster.kmeans = function(.learner, .task, .subset, .weights = NULL, ...) { + stats::kmeans(getTaskData(.task, .subset), ...) } #' @export predictLearner.cluster.kmeans = function(.learner, .model, .newdata, ...) { + switch(.learner$predict.type, response = as.integer(clue::cl_predict(.model$learner.model, newdata = .newdata, type = "class_ids", ...)), prob = as.matrix(clue::cl_predict(.model$learner.model, newdata = .newdata, type = "memberships", ...)) ) } - diff --git a/R/RLearner_multilabel_cforest.R b/R/RLearner_multilabel_cforest.R index db6eef8624..03ceaa9b03 100644 --- a/R/RLearner_multilabel_cforest.R +++ b/R/RLearner_multilabel_cforest.R @@ -1,5 +1,6 @@ #' @export makeRLearner.multilabel.cforest = function() { + makeRLearnerMultilabel( cl = "multilabel.cforest", package = "party", @@ -36,6 +37,7 @@ trainLearner.multilabel.cforest = function(.learner, .task, .subset, .weights = ntree, mtry, replace, fraction, trace, teststat, testtype, mincriterion, minsplit, minbucket, stump, nresample, maxsurrogate, maxdepth, savesplitstats, ...) { + d = getTaskData(.task, .subset) f = getTaskFormula(.task) defaults = getDefaults(getParamSet(.learner)) @@ -52,6 +54,7 @@ trainLearner.multilabel.cforest = function(.learner, .task, .subset, .weights = #' @export predictLearner.multilabel.cforest = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, newdata = .newdata, type = "prob", ...) p = do.call(rbind, p) if (.learner$predict.type == "response") { diff --git a/R/RLearner_multilabel_rFerns.R b/R/RLearner_multilabel_rFerns.R index 5ef8cabdd6..68829ef75f 100644 --- a/R/RLearner_multilabel_rFerns.R +++ b/R/RLearner_multilabel_rFerns.R @@ -1,5 +1,6 @@ #' @export makeRLearner.multilabel.rFerns = function() { + makeRLearnerMultilabel( cl = "multilabel.rFerns", package = "rFerns", @@ -16,12 +17,13 @@ makeRLearner.multilabel.rFerns = function() { #' @export trainLearner.multilabel.rFerns = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) rFerns::rFerns(x = d$data, y = as.matrix(d$target), ...) } #' @export predictLearner.multilabel.rFerns = function(.learner, .model, .newdata, ...) { + as.matrix(predict(.model$learner.model, .newdata, ...)) } - diff --git a/R/RLearner_multilabel_randomForestSRC.R b/R/RLearner_multilabel_randomForestSRC.R index 7308781da8..286ccca30a 100644 --- a/R/RLearner_multilabel_randomForestSRC.R +++ b/R/RLearner_multilabel_randomForestSRC.R @@ -1,5 +1,6 @@ #' @export makeRLearner.multilabel.randomForestSRC = function() { + makeRLearnerMultilabel( cl = "multilabel.randomForestSRC", package = "randomForestSRC", @@ -59,6 +60,7 @@ makeRLearner.multilabel.randomForestSRC = function() { #' @export trainLearner.multilabel.randomForestSRC = function(.learner, .task, .subset, .weights = NULL, ...) { + targets = getTaskTargetNames(.task) f = as.formula(stri_paste("cbind(", stri_paste(targets, collapse = ",", sep = " "), ") ~ .", sep = "")) d = getTaskData(.task, .subset, recode.target = "multilabel.factor") @@ -67,6 +69,7 @@ trainLearner.multilabel.randomForestSRC = function(.learner, .task, .subset, .we #' @export predictLearner.multilabel.randomForestSRC = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, newdata = .newdata, importance = "none", ...) if (.learner$predict.type == "prob") { return(sapply(p$classOutput, function(x) x$predicted[, 1])) diff --git a/R/RLearner_regr_FDboost.R b/R/RLearner_regr_FDboost.R index 1a638cf82f..1d8f32f51f 100644 --- a/R/RLearner_regr_FDboost.R +++ b/R/RLearner_regr_FDboost.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.FDboost = function() { + makeRLearnerRegr( cl = "regr.FDboost", package = c("FDboost", "mboost"), @@ -7,17 +8,17 @@ makeRLearner.regr.FDboost = function() { makeDiscreteLearnerParam(id = "family", default = "Gaussian", values = c("Gaussian", "Laplace", "Huber", "Poisson", "GammaReg", "NBinomial", "Hurdle", "custom.family")), makeIntegerLearnerParam(id = "mstop", default = 100L, lower = 1L), - makeNumericLearnerParam(id = "nu", default = 0.1, lower = 0, upper = 1), # the learning rate - makeUntypedLearnerParam(id = "custom.family.definition", requires = quote(family == "custom.family")), # list of parameters for the custom family - makeNumericVectorLearnerParam(id = "nuirange", default = c(0, 100), requires = quote(family %in% c("GammaReg", "NBinomial", "Hurdle"))), # distribution parameters for families + makeNumericLearnerParam(id = "nu", default = 0.1, lower = 0, upper = 1), # the learning rate + makeUntypedLearnerParam(id = "custom.family.definition", requires = quote(family == "custom.family")), # list of parameters for the custom family + makeNumericVectorLearnerParam(id = "nuirange", default = c(0, 100), requires = quote(family %in% c("GammaReg", "NBinomial", "Hurdle"))), # distribution parameters for families makeNumericLearnerParam(id = "d", default = NULL, requires = quote(family == "Huber"), special.vals = list(NULL)), # delta parameter for Huber distribution # makeDiscreteLearnerParam(id = "risk", values = c("inbag", "oobag", "none")), we don't need this in FDboost - makeNumericLearnerParam(id = "df", default = 4, lower = 0.5), # effective degrees of freedom, depend on the regularization parameter of the penality matrix and number of splines, must be the same for all base learners(covariates), the maximum value is the rank of the design matrix + makeNumericLearnerParam(id = "df", default = 4, lower = 0.5), # effective degrees of freedom, depend on the regularization parameter of the penality matrix and number of splines, must be the same for all base learners(covariates), the maximum value is the rank of the design matrix # makeDiscreteLearnerParam(id = "baselearner", values = c("bbs", "bols")), # we don't use "btree" in FDboost - makeIntegerLearnerParam(id = "knots", default = 10L, lower = 1L), # determine the number of knots of splines, does not matter once there is sufficient number of knots, 30,40, 50 for example - makeIntegerLearnerParam(id = "degree", default = 3L, lower = 1L), # degree of the b-spline - makeIntegerLearnerParam(id = "differences", default = 1L, lower = 1L), # degree of the penalty - makeLogicalLearnerParam(id = "bsignal.check.ident", default = FALSE, tunable = FALSE) # identifiability check by testing matrix degeneracy + makeIntegerLearnerParam(id = "knots", default = 10L, lower = 1L), # determine the number of knots of splines, does not matter once there is sufficient number of knots, 30,40, 50 for example + makeIntegerLearnerParam(id = "degree", default = 3L, lower = 1L), # degree of the b-spline + makeIntegerLearnerParam(id = "differences", default = 1L, lower = 1L), # degree of the penalty + makeLogicalLearnerParam(id = "bsignal.check.ident", default = FALSE, tunable = FALSE) # identifiability check by testing matrix degeneracy ), properties = c("numerics", "functionals"), name = "Functional linear array regression boosting", @@ -43,7 +44,9 @@ trainLearner.regr.FDboost = function(.learner, .task, .subset, .weights = NULL, ) ctrl = learnerArgsToControl(mboost::boost_control, mstop, nu) - suppressMessages({d = getTaskData(.task, functionals.as = "dfcols")}) + suppressMessages({ + d = getTaskData(.task, functionals.as = "dfcols") + }) m = getTaskData(.task, functionals.as = "matrix") tn = getTaskTargetNames(.task) @@ -61,7 +64,7 @@ trainLearner.regr.FDboost = function(.learner, .task, .subset, .weights = NULL, # setup mat.list: for each func covar we add its data matrix and its grid. and once the target col # also setup charvec of formula terms for func covars mat.list = namedList(fdns) - #formula.terms = setNames(character(length = fdns)) + # formula.terms = setNames(character(length = fdns)) formula.terms = namedList(fdns) # for each functional covariate for (fdn in fdns) { @@ -92,11 +95,12 @@ trainLearner.regr.FDboost = function(.learner, .task, .subset, .weights = NULL, # Create the formula and train the model form = as.formula(sprintf("%s ~ %s", tn, collapse(unlist(formula.terms), "+"))) - FDboost::FDboost(formula = form, timeformula = ~bols(1), data = mat.list, control = ctrl, family = family) + FDboost::FDboost(formula = form, timeformula = ~ bols(1), data = mat.list, control = ctrl, family = family) } #' @export predictLearner.regr.FDboost = function(.learner, .model, .newdata, ...) { + nl = as.list(.newdata) prd = predict(object = .model$learner.model, newdata = nl, which = NULL) } diff --git a/R/RLearner_regr_GPfit.R b/R/RLearner_regr_GPfit.R index 03dcda33fb..69a9374b18 100644 --- a/R/RLearner_regr_GPfit.R +++ b/R/RLearner_regr_GPfit.R @@ -1,5 +1,6 @@ #' @export -makeRLearner.regr.GPfit = function(){ +makeRLearner.regr.GPfit = function() { + makeRLearnerRegr( cl = "regr.GPfit", package = "GPfit", @@ -14,7 +15,7 @@ makeRLearner.regr.GPfit = function(){ makeIntegerLearnerParam(id = "matern_nu_k", default = 0L, lower = 0L, requires = quote(type == "matern")), makeNumericLearnerParam(id = "power", default = 1.95, lower = 1.0, upper = 2.0, requires = quote(type == "exponential")) ), - par.vals = list(scale = TRUE, type = "exponential", matern_nu_k = 0L, power = 1.95), + par.vals = list(scale = TRUE, type = "exponential", matern_nu_k = 0L, power = 1.95), properties = c("numerics", "se"), name = "Gaussian Process", short.name = "GPfit", @@ -29,6 +30,7 @@ makeRLearner.regr.GPfit = function(){ } #' @export trainLearner.regr.GPfit = function(.learner, .task, .subset, .weights = NULL, scale, type, matern_nu_k, power, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) low = apply(d$data, 2, min) high = apply(d$data, 2, max) @@ -45,17 +47,18 @@ trainLearner.regr.GPfit = function(.learner, .task, .subset, .weights = NULL, sc } #' @export predictLearner.regr.GPfit = function(.learner, .model, .newdata, ...) { + tr.info = getTrainingInfo(.model) if (tr.info$scaled) { - for (col.name in tr.info$not.const) { - .newdata[, col.name] = (.newdata[, col.name] - tr.info$low[col.name]) / (tr.info$high[col.name] - tr.info$low[col.name]) + for (col.name in tr.info$not.const) { + .newdata[, col.name] = (.newdata[, col.name] - tr.info$low[col.name]) / (tr.info$high[col.name] - tr.info$low[col.name]) } } rst = predict(.model$learner.model, xnew = .newdata[, tr.info$not.const]) se = (.learner$predict.type != "response") - if (!se) + if (!se) { return(rst$Y_hat) - else + } else { cbind(rst$Y_hat, sqrt(rst$MSE)) + } } - diff --git a/R/RLearner_regr_IBk.R b/R/RLearner_regr_IBk.R index c24bcfe09c..37e74cd95a 100644 --- a/R/RLearner_regr_IBk.R +++ b/R/RLearner_regr_IBk.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.IBk = function() { + makeRLearnerRegr( cl = "regr.IBk", package = "RWeka", @@ -21,12 +22,14 @@ makeRLearner.regr.IBk = function() { } #' @export -trainLearner.regr.IBk = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.IBk = function(.learner, .task, .subset, .weights = NULL, ...) { + ctrl = RWeka::Weka_control(...) RWeka::IBk(getTaskFormula(.task), data = getTaskData(.task, .subset), control = ctrl, na.action = na.pass) } #' @export predictLearner.regr.IBk = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, type = "class", ...) } diff --git a/R/RLearner_regr_LiblineaRL2L1SVR.R b/R/RLearner_regr_LiblineaRL2L1SVR.R index 13bd7f16b7..979c6a2536 100644 --- a/R/RLearner_regr_LiblineaRL2L1SVR.R +++ b/R/RLearner_regr_LiblineaRL2L1SVR.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.LiblineaRL2L1SVR = function() { + makeRLearnerRegr( cl = "regr.LiblineaRL2L1SVR", package = "LiblineaR", @@ -11,7 +12,7 @@ makeRLearner.regr.LiblineaRL2L1SVR = function() { makeIntegerLearnerParam(id = "cross", default = 0L, lower = 0L, tunable = FALSE), makeLogicalLearnerParam(id = "verbose", default = FALSE, tunable = FALSE) ), - #provide default to get rid of warning message during training + # provide default to get rid of warning message during training par.vals = list(svr_eps = 0.1), properties = "numerics", name = "L2-Regularized L1-Loss Support Vector Regression", @@ -23,11 +24,13 @@ makeRLearner.regr.LiblineaRL2L1SVR = function() { #' @export trainLearner.regr.LiblineaRL2L1SVR = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) LiblineaR::LiblineaR(data = d$data, target = d$target, type = 13L, ...) } #' @export predictLearner.regr.LiblineaRL2L1SVR = function(.learner, .model, .newdata, ...) { - predict(.model$learner.model, newx = .newdata, ...)$predictions + + predict(.model$learner.model, newx = .newdata, ...)$predictions } diff --git a/R/RLearner_regr_LiblineaRL2L2SVR.R b/R/RLearner_regr_LiblineaRL2L2SVR.R index 671d16e861..8687c1af02 100644 --- a/R/RLearner_regr_LiblineaRL2L2SVR.R +++ b/R/RLearner_regr_LiblineaRL2L2SVR.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.LiblineaRL2L2SVR = function() { + makeRLearnerRegr( cl = "regr.LiblineaRL2L2SVR", package = "LiblineaR", @@ -15,7 +16,7 @@ makeRLearner.regr.LiblineaRL2L2SVR = function() { makeIntegerLearnerParam(id = "cross", default = 0L, lower = 0L, tunable = FALSE), makeLogicalLearnerParam(id = "verbose", default = FALSE, tunable = FALSE) ), - #provide default to get rid of warning message during training + # provide default to get rid of warning message during training par.vals = list(svr_eps = 0.1, type = 11L), properties = "numerics", name = "L2-Regularized L2-Loss Support Vector Regression", @@ -27,11 +28,13 @@ makeRLearner.regr.LiblineaRL2L2SVR = function() { #' @export trainLearner.regr.LiblineaRL2L2SVR = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) LiblineaR::LiblineaR(data = d$data, target = d$target, ...) } #' @export predictLearner.regr.LiblineaRL2L2SVR = function(.learner, .model, .newdata, ...) { - predict(.model$learner.model, newx = .newdata, ...)$predictions + + predict(.model$learner.model, newx = .newdata, ...)$predictions } diff --git a/R/RLearner_regr_RRF.R b/R/RLearner_regr_RRF.R index 257ad63a34..ab3c74f042 100644 --- a/R/RLearner_regr_RRF.R +++ b/R/RLearner_regr_RRF.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.RRF = function() { + makeRLearnerRegr( cl = "regr.RRF", package = "RRF", @@ -12,9 +13,9 @@ makeRLearner.regr.RRF = function() { makeLogicalLearnerParam(id = "replace", default = TRUE), makeIntegerLearnerParam(id = "flagReg", default = 1L, lower = 0), makeNumericLearnerParam(id = "coefReg", default = 0.8, - requires = quote(flagReg == 1L)), + requires = quote(flagReg == 1L)), makeIntegerVectorLearnerParam(id = "feaIni", lower = 0, upper = Inf, - requires = quote(flagReg == 1L)), + requires = quote(flagReg == 1L)), makeLogicalLearnerParam(id = "corr.bias", default = FALSE), makeIntegerLearnerParam(id = "maxnodes", lower = 1L), makeLogicalLearnerParam(id = "importance", default = FALSE), @@ -37,17 +38,20 @@ makeRLearner.regr.RRF = function() { #' @export trainLearner.regr.RRF = function(.learner, .task, .subset, .weights, ...) { + RRF::RRF(formula = getTaskFormula(.task), data = getTaskData(.task, .subset), - keep.forest = TRUE, ...) + keep.forest = TRUE, ...) } #' @export predictLearner.regr.RRF = function(.learner, .model, .newdata, ...) { + p = predict(object = .model$learner.model, newdata = .newdata, ...) return(p) } #' @export getFeatureImportanceLearner.regr.RRF = function(.learner, .model, ...) { + getFeatureImportanceLearner.classif.RRF(.learner, .model, ...) } diff --git a/R/RLearner_regr_bartMachine.R b/R/RLearner_regr_bartMachine.R index 14110b30d5..48c799a2b3 100644 --- a/R/RLearner_regr_bartMachine.R +++ b/R/RLearner_regr_bartMachine.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.bartMachine = function() { + makeRLearnerRegr( cl = "regr.bartMachine", package = "bartMachine", @@ -40,12 +41,13 @@ makeRLearner.regr.bartMachine = function() { #' @export trainLearner.regr.bartMachine = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) bartMachine::bartMachine(X = d$data, y = d$target, ...) } #' @export predictLearner.regr.bartMachine = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, new_data = .newdata, ...) } - diff --git a/R/RLearner_regr_bcart.R b/R/RLearner_regr_bcart.R index 538c066705..493873bc2d 100644 --- a/R/RLearner_regr_bcart.R +++ b/R/RLearner_regr_bcart.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.bcart = function() { + makeRLearnerRegr( cl = "regr.bcart", package = "tgp", @@ -29,6 +30,7 @@ makeRLearner.regr.bcart = function() { #' @export trainLearner.regr.bcart = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) # factor variables must be in the last columns as dummy variables: col.types = vcapply(d$data, function(x) class(x)) @@ -47,6 +49,7 @@ trainLearner.regr.bcart = function(.learner, .task, .subset, .weights = NULL, .. #' @export predictLearner.regr.bcart = function(.learner, .model, .newdata, ...) { + # factor variables must be in the last columns as dummy variables: col.types = vcapply(.newdata, function(x) class(x)) factor.ind = (col.types == "factor") diff --git a/R/RLearner_regr_bgp.R b/R/RLearner_regr_bgp.R index 23d96c1fbe..149fa53573 100644 --- a/R/RLearner_regr_bgp.R +++ b/R/RLearner_regr_bgp.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.bgp = function() { + makeRLearnerRegr( cl = "regr.bgp", package = "tgp", @@ -33,12 +34,14 @@ makeRLearner.regr.bgp = function() { #' @export trainLearner.regr.bgp = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) tgp::bgp(X = d$data, Z = d$target, pred.n = FALSE, ...) } #' @export predictLearner.regr.bgp = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, XX = .newdata, pred.n = FALSE, ...) if (.learner$predict.type == "response") { return(p$ZZ.km) diff --git a/R/RLearner_regr_bgpllm.R b/R/RLearner_regr_bgpllm.R index c422c5ba4a..7d94358e47 100644 --- a/R/RLearner_regr_bgpllm.R +++ b/R/RLearner_regr_bgpllm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.bgpllm = function() { + makeRLearnerRegr( cl = "regr.bgpllm", package = "tgp", @@ -35,12 +36,14 @@ makeRLearner.regr.bgpllm = function() { #' @export trainLearner.regr.bgpllm = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) tgp::bgpllm(X = d$data, Z = d$target, pred.n = FALSE, ...) } #' @export predictLearner.regr.bgpllm = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, XX = .newdata, pred.n = FALSE, ...) if (.learner$predict.type == "response") { return(p$ZZ.km) diff --git a/R/RLearner_regr_blm.R b/R/RLearner_regr_blm.R index 3bce4f7b61..ba4c9c3e15 100644 --- a/R/RLearner_regr_blm.R +++ b/R/RLearner_regr_blm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.blm = function() { + makeRLearnerRegr( cl = "regr.blm", package = "tgp", @@ -29,12 +30,14 @@ makeRLearner.regr.blm = function() { #' @export trainLearner.regr.blm = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) tgp::blm(X = d$data, Z = d$target, pred.n = FALSE, ...) } #' @export predictLearner.regr.blm = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, XX = .newdata, pred.n = FALSE, ...) if (.learner$predict.type == "response") { return(p$ZZ.km) diff --git a/R/RLearner_regr_brnn.R b/R/RLearner_regr_brnn.R index a8db29e151..ea9f6477f6 100644 --- a/R/RLearner_regr_brnn.R +++ b/R/RLearner_regr_brnn.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.brnn = function() { + makeRLearnerRegr( cl = "regr.brnn", package = "brnn", @@ -29,11 +30,13 @@ makeRLearner.regr.brnn = function() { #' @export trainLearner.regr.brnn = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) brnn::brnn(f, data = getTaskData(.task, .subset), ...) } #' @export predictLearner.regr.brnn = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, ...) } diff --git a/R/RLearner_regr_bst.R b/R/RLearner_regr_bst.R index e5d86d4b26..1f5f091d19 100644 --- a/R/RLearner_regr_bst.R +++ b/R/RLearner_regr_bst.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.bst = function() { + makeRLearnerRegr( cl = "regr.bst", package = c("bst", "rpart"), @@ -40,10 +41,11 @@ makeRLearner.regr.bst = function() { trainLearner.regr.bst = function(.learner, .task, .subset, .weights = NULL, mstop, nu, twinboost, f.init, xselect.init, center, trace, numsample, df, minsplit, minbucket, cp, maxsurrogate, usesurrogate, surrogatestyle, maxdepth, xval, Learner, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) ctrl = learnerArgsToControl(bst::bst_control, mstop, nu, twinboost, f.init, xselect.init, center, trace, numsample, df) - control.tree = learnerArgsToControl(list, minsplit, minbucket, cp, maxsurrogate, + control.tree = learnerArgsToControl(list, minsplit, minbucket, cp, maxsurrogate, usesurrogate, surrogatestyle, maxdepth, xval) bst::bst(x = d$data, y = d$target, family = "gaussian", ctrl = ctrl, control.tree = control.tree, learner = Learner, ...) @@ -51,5 +53,6 @@ trainLearner.regr.bst = function(.learner, .task, .subset, .weights = NULL, msto #' @export predictLearner.regr.bst = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, .newdata, ...) } diff --git a/R/RLearner_regr_btgp.R b/R/RLearner_regr_btgp.R index 9760fa726d..a63a3f9edc 100644 --- a/R/RLearner_regr_btgp.R +++ b/R/RLearner_regr_btgp.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.btgp = function() { + makeRLearnerRegr( cl = "regr.btgp", package = "tgp", @@ -37,6 +38,7 @@ makeRLearner.regr.btgp = function() { #' @export trainLearner.regr.btgp = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) # factor variables must be in the last columns as dummy variables: col.types = vcapply(d$data, function(x) class(x)) @@ -55,6 +57,7 @@ trainLearner.regr.btgp = function(.learner, .task, .subset, .weights = NULL, ... #' @export predictLearner.regr.btgp = function(.learner, .model, .newdata, ...) { + # factor variables must be in the last columns as dummy variables: col.types = vcapply(.newdata, function(x) class(x)) factor.ind = (col.types == "factor") diff --git a/R/RLearner_regr_btgpllm.R b/R/RLearner_regr_btgpllm.R index 9b9698b426..9b78623d83 100644 --- a/R/RLearner_regr_btgpllm.R +++ b/R/RLearner_regr_btgpllm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.btgpllm = function() { + makeRLearnerRegr( cl = "regr.btgpllm", package = "tgp", @@ -39,6 +40,7 @@ makeRLearner.regr.btgpllm = function() { #' @export trainLearner.regr.btgpllm = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) # factor variables must be in the last columns as dummy variables: col.types = vcapply(d$data, function(x) class(x)) @@ -57,6 +59,7 @@ trainLearner.regr.btgpllm = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.regr.btgpllm = function(.learner, .model, .newdata, ...) { + # factor variables must be in the last columns as dummy variables: col.types = vcapply(.newdata, function(x) class(x)) factor.ind = (col.types == "factor") diff --git a/R/RLearner_regr_btlm.R b/R/RLearner_regr_btlm.R index c157d13b0b..b0db1968a4 100644 --- a/R/RLearner_regr_btlm.R +++ b/R/RLearner_regr_btlm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.btlm = function() { + makeRLearnerRegr( cl = "regr.btlm", package = "tgp", @@ -31,6 +32,7 @@ makeRLearner.regr.btlm = function() { #' @export trainLearner.regr.btlm = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) # factor variables must be in the last columns as dummy variables: col.types = vcapply(d$data, function(x) class(x)) @@ -49,6 +51,7 @@ trainLearner.regr.btlm = function(.learner, .task, .subset, .weights = NULL, ... #' @export predictLearner.regr.btlm = function(.learner, .model, .newdata, ...) { + # factor variables must be in the last columns as dummy variables: col.types = vcapply(.newdata, function(x) class(x)) factor.ind = (col.types == "factor") diff --git a/R/RLearner_regr_cforest.R b/R/RLearner_regr_cforest.R index 72c1b59257..369c14c581 100644 --- a/R/RLearner_regr_cforest.R +++ b/R/RLearner_regr_cforest.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.cforest = function() { + makeRLearnerRegr( cl = "regr.cforest", package = "party", @@ -36,6 +37,7 @@ makeRLearner.regr.cforest = function() { trainLearner.regr.cforest = function(.learner, .task, .subset, .weights = NULL, ntree, mtry, replace, fraction, trace, teststat, testtype, mincriterion, minsplit, minbucket, stump, nresample, maxsurrogate, maxdepth, savesplitstats, ...) { + f = getTaskFormula(.task) d = getTaskData(.task, .subset) defaults = getDefaults(getParamSet(.learner)) @@ -53,10 +55,12 @@ trainLearner.regr.cforest = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.regr.cforest = function(.learner, .model, .newdata, ...) { + as.vector(predict(.model$learner.model, newdata = .newdata, ...)) } #' @export getFeatureImportanceLearner.regr.cforest = function(.learner, .model, ...) { + getFeatureImportanceLearner.classif.cforest(.learner, .model, auc = FALSE, ...) } diff --git a/R/RLearner_regr_crs.R b/R/RLearner_regr_crs.R index 09712b75ff..5b044ea027 100644 --- a/R/RLearner_regr_crs.R +++ b/R/RLearner_regr_crs.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.crs = function() { + makeRLearnerRegr( cl = "regr.crs", package = "!crs", @@ -48,17 +49,19 @@ makeRLearner.regr.crs = function() { } #' @export -trainLearner.regr.crs = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.crs = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) if (is.null(.weights)) { crs::crs(formula = f, data = getTaskData(.task, .subset), ...) - } else { + } else { crs::crs(formula = f, data = getTaskData(.task, .subset), weights = .weights, ...) } } #' @export predictLearner.regr.crs = function(.learner, .model, .newdata, ...) { + if (.learner$predict.type == "se") { pred = predict(.model$learner.model, newdata = .newdata, ...) lwr = attr(pred, "lwr") diff --git a/R/RLearner_regr_ctree.R b/R/RLearner_regr_ctree.R index 02c681eb93..d1ac208634 100644 --- a/R/RLearner_regr_ctree.R +++ b/R/RLearner_regr_ctree.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.ctree = function() { + makeRLearnerRegr( cl = "regr.ctree", package = "party", @@ -37,5 +38,6 @@ trainLearner.regr.ctree = function(.learner, .task, .subset, .weights = NULL, te #' @export predictLearner.regr.ctree = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, ...)[, 1L] } diff --git a/R/RLearner_regr_cubist.R b/R/RLearner_regr_cubist.R index a8db4f4a6d..d9912aa543 100644 --- a/R/RLearner_regr_cubist.R +++ b/R/RLearner_regr_cubist.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.cubist = function() { + makeRLearnerRegr( cl = "regr.cubist", package = "Cubist", @@ -23,6 +24,7 @@ makeRLearner.regr.cubist = function() { #' @export trainLearner.regr.cubist = function(.learner, .task, .subset, .weights = NULL, unbiased, rules, extrapolation, sample, seed, label, ...) { + ctrl = learnerArgsToControl(Cubist::cubistControl, unbiased, rules, extrapolation, sample, seed, label) d = getTaskData(.task, .subset, target.extra = TRUE) Cubist::cubist(x = d$data, y = d$target, control = ctrl, ...) @@ -30,5 +32,6 @@ trainLearner.regr.cubist = function(.learner, .task, .subset, .weights = NULL, u #' @export predictLearner.regr.cubist = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, ...) } diff --git a/R/RLearner_regr_cvglmnet.R b/R/RLearner_regr_cvglmnet.R index 24d3b3b31d..60e8932d77 100644 --- a/R/RLearner_regr_cvglmnet.R +++ b/R/RLearner_regr_cvglmnet.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.cvglmnet = function() { + makeRLearnerRegr( cl = "regr.cvglmnet", package = "glmnet", @@ -21,7 +22,7 @@ makeRLearner.regr.cvglmnet = function() { makeNumericVectorLearnerParam(id = "lower.limits", upper = 0), makeNumericVectorLearnerParam(id = "upper.limits", lower = 0), makeIntegerLearnerParam(id = "maxit", default = 100000L, lower = 1L), - #FIXME Data dependent default. If n.features < 500 'covariance', 'naive' otherwise + # FIXME Data dependent default. If n.features < 500 'covariance', 'naive' otherwise makeDiscreteLearnerParam(id = "type.gaussian", values = c("covariance", "naive")), makeNumericLearnerParam(id = "fdev", default = 1.0e-5, lower = 0, upper = 1), makeNumericLearnerParam(id = "devmax", default = 0.999, lower = 0, upper = 1), @@ -47,12 +48,14 @@ makeRLearner.regr.cvglmnet = function() { #' @export trainLearner.regr.cvglmnet = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) info = getFixDataInfo(d$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) args = c(list(x = as.matrix(fixDataForLearner(d$data, info)), y = d$target), list(...)) rm(d) - if (!is.null(.weights)) + if (!is.null(.weights)) { args$weights = .weights + } glmnet::glmnet.control(factory = TRUE) saved.ctrl = glmnet::glmnet.control() @@ -68,6 +71,7 @@ trainLearner.regr.cvglmnet = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.regr.cvglmnet = function(.learner, .model, .newdata, ...) { + info = getTrainingInfo(.model) .newdata = as.matrix(fixDataForLearner(.newdata, info)) p = drop(predict(.model$learner.model, newx = .newdata, type = "response", ...)) diff --git a/R/RLearner_regr_earth.R b/R/RLearner_regr_earth.R index 9dba21091d..8beffd24e7 100644 --- a/R/RLearner_regr_earth.R +++ b/R/RLearner_regr_earth.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.earth = function() { + makeRLearnerRegr( cl = "regr.earth", package = "earth", @@ -28,12 +29,14 @@ makeRLearner.regr.earth = function() { } #' @export -trainLearner.regr.earth = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.earth = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) earth::earth(f, data = getTaskData(.task, .subset), ...) } #' @export predictLearner.regr.earth = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata)[, 1L] } diff --git a/R/RLearner_regr_evtree.R b/R/RLearner_regr_evtree.R index 5dd3215fa5..e6a6644fc7 100644 --- a/R/RLearner_regr_evtree.R +++ b/R/RLearner_regr_evtree.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.evtree = function() { + makeRLearnerRegr( cl = "regr.evtree", package = "evtree", @@ -30,6 +31,7 @@ makeRLearner.regr.evtree = function() { trainLearner.regr.evtree = function(.learner, .task, .subset, .weights = NULL, pmutatemajor, pmutateminor, pcrossover, psplit, pprune, seed, ...) { + f = getTaskFormula(.task) d = getTaskData(.task, .subset) defaults = getDefaults(getParamSet(.learner)) @@ -46,6 +48,7 @@ trainLearner.regr.evtree = function(.learner, .task, .subset, #' @export predictLearner.regr.evtree = function(.learner, .model, .newdata, ...) { + colnames(.newdata) = attr(.model$learner.model$terms, "term.labels") p = predict(.model$learner.model, newdata = .newdata, ...) } diff --git a/R/RLearner_regr_extraTrees.R b/R/RLearner_regr_extraTrees.R index 5c2de666e3..3b8fa972a1 100644 --- a/R/RLearner_regr_extraTrees.R +++ b/R/RLearner_regr_extraTrees.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.extraTrees = function() { + makeRLearnerRegr( cl = "regr.extraTrees", package = "extraTrees", @@ -27,14 +28,17 @@ makeRLearner.regr.extraTrees = function() { #' @export trainLearner.regr.extraTrees = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) args = c(list(x = as.matrix(d$data), y = d$target), list(...)) - if (!is.null(.weights)) + if (!is.null(.weights)) { args$weights = .weights + } do.call(extraTrees::extraTrees, args) } #' @export predictLearner.regr.extraTrees = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, as.matrix(.newdata), ...) } diff --git a/R/RLearner_regr_featureless.R b/R/RLearner_regr_featureless.R index 60591705c1..a9519a09db 100644 --- a/R/RLearner_regr_featureless.R +++ b/R/RLearner_regr_featureless.R @@ -19,6 +19,7 @@ NULL #' @export makeRLearner.regr.featureless = function() { + makeRLearnerRegr( cl = "regr.featureless", package = "mlr", @@ -33,6 +34,7 @@ makeRLearner.regr.featureless = function() { #' @export trainLearner.regr.featureless = function(.learner, .task, .subset, .weights = NULL, method = "mean", ...) { + y = getTaskTargets(.task) if (!is.null(.subset)) { y = y[.subset] @@ -48,6 +50,7 @@ trainLearner.regr.featureless = function(.learner, .task, .subset, .weights = NU #' @export predictLearner.regr.featureless = function(.learner, .model, .newdata, ...) { + # extract some shortcuts n = nrow(.newdata) mod = getLearnerModel(.model) diff --git a/R/RLearner_regr_fnn.R b/R/RLearner_regr_fnn.R index 1450172ad4..828747ae87 100644 --- a/R/RLearner_regr_fnn.R +++ b/R/RLearner_regr_fnn.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.fnn = function() { + makeRLearnerRegr( cl = "regr.fnn", package = "FNN", @@ -17,13 +18,15 @@ makeRLearner.regr.fnn = function() { } #' @export -trainLearner.regr.fnn = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.fnn = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) list(train = d, parset = list(...)) } #' @export predictLearner.regr.fnn = function(.learner, .model, .newdata, ...) { + m = .model$learner.model pars = c(list(train = m$train$data, test = .newdata, y = m$train$target), m$parset, list(...)) do.call(FNN::knn.reg, pars)$pred diff --git a/R/RLearner_regr_frbs.R b/R/RLearner_regr_frbs.R index f45a00b293..5f1a0bfaec 100644 --- a/R/RLearner_regr_frbs.R +++ b/R/RLearner_regr_frbs.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.frbs = function() { + makeRLearnerRegr( cl = "regr.frbs", package = "frbs", @@ -64,6 +65,7 @@ makeRLearner.regr.frbs = function() { #' @export trainLearner.regr.frbs = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) args = list(...) method.arg = names(args) == "method" @@ -78,5 +80,6 @@ trainLearner.regr.frbs = function(.learner, .task, .subset, .weights = NULL, ... #' @export predictLearner.regr.frbs = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, ...)[, 1L] } diff --git a/R/RLearner_regr_gamboost.R b/R/RLearner_regr_gamboost.R index 6365b07d9d..ea09ce2213 100644 --- a/R/RLearner_regr_gamboost.R +++ b/R/RLearner_regr_gamboost.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.gamboost = function() { + makeRLearnerRegr( cl = "regr.gamboost", package = "mboost", @@ -32,6 +33,7 @@ makeRLearner.regr.gamboost = function() { #' @export trainLearner.regr.gamboost = function(.learner, .task, .subset, .weights = NULL, family = "Gaussian", nuirange = c(0, 100), d = NULL, custom.family.definition, mstop, nu, risk, trace, stopintern, ...) { + requirePackages("mboost", why = "argument 'baselearner' requires package", suppress.warnings = TRUE) ctrl = learnerArgsToControl(mboost::boost_control, mstop, nu, risk, trace, stopintern) data = getTaskData(.task, .subset) @@ -56,6 +58,7 @@ trainLearner.regr.gamboost = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.regr.gamboost = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, newdata = .newdata, ...) return(as.vector(p)) } diff --git a/R/RLearner_regr_gausspr.R b/R/RLearner_regr_gausspr.R index adb693c29c..72edc21789 100644 --- a/R/RLearner_regr_gausspr.R +++ b/R/RLearner_regr_gausspr.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.gausspr = function() { + makeRLearnerRegr( cl = "regr.gausspr", package = "kernlab", @@ -35,18 +36,21 @@ makeRLearner.regr.gausspr = function() { #' @export trainLearner.regr.gausspr = function(.learner, .task, .subset, .weights = NULL, - degree, offset, scale, sigma, order, length, lambda, normalized, ...) { + degree, offset, scale, sigma, order, length, lambda, normalized, ...) { + kpar = learnerArgsToControl(list, degree, offset, scale, sigma, order, length, lambda, normalized) f = getTaskFormula(.task) vm = .learner$predict.type == "se" - if (base::length(kpar) > 0L) + if (base::length(kpar) > 0L) { kernlab::gausspr(f, data = getTaskData(.task, .subset), kpar = kpar, variance.model = vm, type = "regression", ...) - else + } else { kernlab::gausspr(f, data = getTaskData(.task, .subset), variance.model = vm, type = "regression", ...) + } } #' @export predictLearner.regr.gausspr = function(.learner, .model, .newdata, ...) { + if (.learner$predict.type != "se") { as.vector(kernlab::predict(.model$learner.model, newdata = .newdata, ...)) } else { diff --git a/R/RLearner_regr_gbm.R b/R/RLearner_regr_gbm.R index 328d92cfc9..dcf78551aa 100644 --- a/R/RLearner_regr_gbm.R +++ b/R/RLearner_regr_gbm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.gbm = function() { + makeRLearnerRegr( cl = "regr.gbm", package = "gbm", @@ -26,12 +27,13 @@ makeRLearner.regr.gbm = function() { } #' @export -trainLearner.regr.gbm = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.gbm = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) if (is.null(.weights)) { f = getTaskFormula(.task) gbm::gbm(f, data = getTaskData(.task, .subset), ...) - } else { + } else { f = getTaskFormula(.task) gbm::gbm(f, data = getTaskData(.task, .subset), weights = .weights, ...) } @@ -39,11 +41,13 @@ trainLearner.regr.gbm = function(.learner, .task, .subset, .weights = NULL, ... #' @export predictLearner.regr.gbm = function(.learner, .model, .newdata, ...) { + m = .model$learner.model gbm::predict.gbm(m, newdata = .newdata, n.trees = length(m$trees), ...) } #' @export getFeatureImportanceLearner.regr.gbm = function(.learner, .model, ...) { + getFeatureImportanceLearner.classif.gbm(.learner, .model, ...) - } +} diff --git a/R/RLearner_regr_glm.R b/R/RLearner_regr_glm.R index a407f438c9..691d211258 100644 --- a/R/RLearner_regr_glm.R +++ b/R/RLearner_regr_glm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.glm = function() { + makeRLearnerRegr( cl = "regr.glm", package = "stats", @@ -55,18 +56,21 @@ trainLearner.regr.glm = function(.learner, .task, .subset, .weights = NULL, epsi Gamma = stats::Gamma(link = make.link(Gamma.link)), inverse.gaussian = stats::inverse.gaussian(link = make.link(inverse.gaussian.link)) ) - if (is.null(.weights)) + if (is.null(.weights)) { m = stats::glm(f, data = d, control = ctrl, family = family, ...) - else + } else { m = stats::glm(f, data = d, control = ctrl, weights = .weights, family = family, ...) + } return(m) } #' @export predictLearner.regr.glm = function(.learner, .model, .newdata, ...) { + se.fit = .learner$predict.type == "se" p = predict(.model$learner.model, newdata = .newdata, type = "response", se.fit = se.fit, ...) - if (se.fit) + if (se.fit) { p = cbind(p$fit, p$se.fit) + } return(p) } diff --git a/R/RLearner_regr_glmboost.R b/R/RLearner_regr_glmboost.R index 78e34c17d1..24ec19c38b 100644 --- a/R/RLearner_regr_glmboost.R +++ b/R/RLearner_regr_glmboost.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.glmboost = function() { + makeRLearnerRegr( cl = "regr.glmboost", package = "mboost", @@ -17,7 +18,7 @@ makeRLearner.regr.glmboost = function() { # 'risk' and 'stopintern' will be kept for completeness sake makeLogicalLearnerParam(id = "center", default = TRUE), makeLogicalLearnerParam(id = "trace", default = FALSE, tunable = FALSE) - ), + ), par.vals = list(), properties = c("numerics", "factors", "weights"), name = "Boosting for GLMs", @@ -29,6 +30,7 @@ makeRLearner.regr.glmboost = function() { #' @export trainLearner.regr.glmboost = function(.learner, .task, .subset, .weights = NULL, family = "Gaussian", nuirange = c(0, 100), d = NULL, custom.family.definition, mstop, nu, risk, trace, stopintern, ...) { + ctrl = learnerArgsToControl(mboost::boost_control, mstop, nu, risk, trace, stopintern) data = getTaskData(.task, .subset) f = getTaskFormula(.task) @@ -41,7 +43,7 @@ trainLearner.regr.glmboost = function(.learner, .task, .subset, .weights = NULL, NBinomial = mboost::NBinomial(nuirange = nuirange), Hurdle = mboost::Hurdle(nuirange = nuirange), custom.family = custom.family.definition - ) + ) if (is.null(.weights)) { model = mboost::glmboost(f, data = data, control = ctrl, family = family, ...) } else { @@ -52,6 +54,7 @@ trainLearner.regr.glmboost = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.regr.glmboost = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, newdata = .newdata, ...) return(as.vector(p)) } diff --git a/R/RLearner_regr_glmnet.R b/R/RLearner_regr_glmnet.R index 16795981fc..5f1b7f0b06 100644 --- a/R/RLearner_regr_glmnet.R +++ b/R/RLearner_regr_glmnet.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.glmnet = function() { + makeRLearnerRegr( cl = "regr.glmnet", package = "glmnet", @@ -50,12 +51,14 @@ makeRLearner.regr.glmnet = function() { #' @export trainLearner.regr.glmnet = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) info = getFixDataInfo(d$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) args = c(list(x = as.matrix(fixDataForLearner(d$data, info)), y = d$target), list(...)) rm(d) - if (!is.null(.weights)) + if (!is.null(.weights)) { args$weights = .weights + } glmnet::glmnet.control(factory = TRUE) saved.ctrl = glmnet::glmnet.control() @@ -71,6 +74,7 @@ trainLearner.regr.glmnet = function(.learner, .task, .subset, .weights = NULL, . #' @export predictLearner.regr.glmnet = function(.learner, .model, .newdata, ...) { + info = getTrainingInfo(.model) .newdata = as.matrix(fixDataForLearner(.newdata, info)) drop(predict(.model$learner.model, newx = .newdata, ...)) diff --git a/R/RLearner_regr_h2odeeplearning.R b/R/RLearner_regr_h2odeeplearning.R index b5accc556e..cbb10198a2 100644 --- a/R/RLearner_regr_h2odeeplearning.R +++ b/R/RLearner_regr_h2odeeplearning.R @@ -151,8 +151,9 @@ # Details: https://leanpub.com/deeplearning/read -#'@export +#' @export makeRLearner.regr.h2o.deeplearning = function() { + makeRLearnerRegr( cl = "regr.h2o.deeplearning", package = "h2o", @@ -161,7 +162,7 @@ makeRLearner.regr.h2o.deeplearning = function() { makeLogicalLearnerParam("use_all_factor_level", default = TRUE), makeDiscreteLearnerParam("activation", values = c("Rectifier", "Tanh", "TanhWithDropout", "RectifierWithDropout", "Maxout", "MaxoutWithDropout"), - default = "Rectifier"), + default = "Rectifier"), # FIXME: hidden can also be a list of integer vectors for grid search makeIntegerVectorLearnerParam("hidden", default = c(200L, 200L), len = NA_integer_, lower = 1L), @@ -203,7 +204,7 @@ makeRLearner.regr.h2o.deeplearning = function() { makeIntegerLearnerParam("stopping_rounds", default = 5L, lower = 0L), makeDiscreteLearnerParam("stopping_metric", values = c("AUTO", "deviance", "logloss", "MSE", "AUC", "r2", "misclassification"), default = "AUTO", - requires = quote(stopping_rounds > 0L)), + requires = quote(stopping_rounds > 0L)), makeNumericLearnerParam("stopping_tolerance", default = 0, lower = 0), makeNumericLearnerParam("max_runtime_secs", default = 0, lower = 0), makeLogicalLearnerParam("quiet_mode", tunable = FALSE), @@ -223,7 +224,7 @@ makeRLearner.regr.h2o.deeplearning = function() { makeLogicalLearnerParam("sparse", default = FALSE, tunable = FALSE), makeLogicalLearnerParam("col_major", default = FALSE, tunable = FALSE), makeLogicalLearnerParam("average_activation", tunable = FALSE), - #makeLogicalLearnerParam("sparsity_beta", tunable = FALSE), + # makeLogicalLearnerParam("sparsity_beta", tunable = FALSE), makeLogicalLearnerParam("reproducible", default = FALSE, tunable = FALSE), makeLogicalLearnerParam("export_weights_and_biases", default = FALSE, tunable = FALSE) ), @@ -237,6 +238,7 @@ makeRLearner.regr.h2o.deeplearning = function() { #' @export trainLearner.regr.h2o.deeplearning = function(.learner, .task, .subset, .weights = NULL, ...) { + # check if h2o connection already exists, otherwise start one conn.up = tryCatch(h2o::h2o.getConnection(), error = function(err) return(FALSE)) if (!inherits(conn.up, "H2OConnection")) { @@ -256,10 +258,10 @@ trainLearner.regr.h2o.deeplearning = function(.learner, .task, .subset, .weights #' @export predictLearner.regr.h2o.deeplearning = function(.learner, .model, .newdata, ...) { + m = .model$learner.model h2of = h2o::as.h2o(.newdata) p = h2o::h2o.predict(m, newdata = h2of, ...) p.df = as.data.frame(p) return(p.df$predict) } - diff --git a/R/RLearner_regr_h2ogbm.R b/R/RLearner_regr_h2ogbm.R index 0dc3679aeb..6f45dea194 100644 --- a/R/RLearner_regr_h2ogbm.R +++ b/R/RLearner_regr_h2ogbm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.h2o.gbm = function() { + makeRLearnerRegr( cl = "regr.h2o.gbm", package = "h2o", @@ -7,7 +8,7 @@ makeRLearner.regr.h2o.gbm = function() { # See http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/gbm.html makeIntegerLearnerParam("ntrees", lower = 1L, default = 50L), makeIntegerLearnerParam("max_depth", lower = 1L, default = 5L), - makeIntegerLearnerParam("min_rows", lower = 1L, default = 10L), + makeIntegerLearnerParam("min_rows", lower = 1L, default = 10L), makeIntegerLearnerParam("nbins", lower = 1L, default = 20L), makeIntegerLearnerParam("nbins_cats", lower = 1L, default = 1024), makeIntegerLearnerParam("nbins_top_level", lower = 1L, default = 1024), @@ -15,36 +16,36 @@ makeRLearner.regr.h2o.gbm = function() { makeNumericLearnerParam("learn_rate", lower = 0, upper = 1, default = 0.1), makeNumericLearnerParam("learn_rate_annealing", lower = 0, upper = 1, default = 1), makeDiscreteLearnerParam("distribution", - values = c("poisson", "laplace", "tweedie", "gaussian", "huber", "gamma", "quantile"), - default = "gaussian"), + values = c("poisson", "laplace", "tweedie", "gaussian", "huber", "gamma", "quantile"), + default = "gaussian"), makeNumericLearnerParam("sample_rate", lower = 0, upper = 1, default = 1), - #makeNumericLearnerParam("sample_rate_per_class", lower = 0, upper = 1, default = NULL, special.vals = list(NULL)), + # makeNumericLearnerParam("sample_rate_per_class", lower = 0, upper = 1, default = NULL, special.vals = list(NULL)), makeNumericLearnerParam("col_sample_rate", lower = 0, upper = 1, default = 1), makeNumericLearnerParam("col_sample_rate_change_per_level", lower = 0, upper = 1, default = 1), makeNumericLearnerParam("col_sample_rate_per_tree", lower = 0, upper = 1, default = 1), makeNumericLearnerParam("max_abs_leafnode_pred", lower = 0, default = Inf, allow.inf = TRUE), makeNumericLearnerParam("pred_noise_bandwidth", lower = 0, default = 0), makeDiscreteLearnerParam("categorical_encoding", - values = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", - "Eigen", "LabelEncoder", "SortByResponse"), - default = "AUTO"), + values = c("AUTO", "Enum", "OneHotInternal", "OneHotExplicit", "Binary", + "Eigen", "LabelEncoder", "SortByResponse"), + default = "AUTO"), makeNumericLearnerParam("min_split_improvement", lower = 0, default = 1e-05), makeDiscreteLearnerParam("histogram_type", - values = c("AUTO", "UniformAdaptive", "Random", "QuantilesGlobal", "RoundRobin"), - default = "AUTO"), + values = c("AUTO", "UniformAdaptive", "Random", "QuantilesGlobal", "RoundRobin"), + default = "AUTO"), makeLogicalLearnerParam("score_each_iteration", default = FALSE, tunable = FALSE), makeIntegerLearnerParam("score_tree_interval", lower = 0L, default = 0L, tunable = FALSE), makeIntegerLearnerParam("stopping_rounds", lower = 0L, default = 0L, tunable = FALSE), makeDiscreteLearnerParam("stopping_metric", - values = c("AUTO", "deviance", "logloss", "MSE", "RMSE", "MAE", "RMSLE"), - default = "AUTO", tunable = FALSE), + values = c("AUTO", "deviance", "logloss", "MSE", "RMSE", "MAE", "RMSLE"), + default = "AUTO", tunable = FALSE), makeNumericLearnerParam("stopping_tolerance", lower = 0, upper = Inf, default = 0.001, tunable = FALSE), makeNumericLearnerParam("quantile_alpha", lower = 0, upper = 100, default = 0.5, - requires = expression(distribution == "quantile")), + requires = expression(distribution == "quantile")), makeNumericLearnerParam("tweedie_power", lower = 1, upper = 2, default = 1.5, special.vals = list(0), - requires = expression(distribution == "tweedie")), + requires = expression(distribution == "tweedie")), makeNumericLearnerParam("huber_alpha", lower = 0, upper = 1, default = 0.9, - requires = expression(distribution == "huber")) + requires = expression(distribution == "huber")) ), properties = c("numerics", "factors", "missings"), name = "h2o.gbm", @@ -55,7 +56,8 @@ makeRLearner.regr.h2o.gbm = function() { } #' @export -trainLearner.regr.h2o.gbm = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.h2o.gbm = function(.learner, .task, .subset, .weights = NULL, ...) { + # check if h2o connection already exists, otherwise start one conn.up = tryCatch(h2o::h2o.getConnection(), error = function(err) return(FALSE)) if (!inherits(conn.up, "H2OConnection")) { @@ -78,6 +80,7 @@ trainLearner.regr.h2o.gbm = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.regr.h2o.gbm = function(.learner, .model, .newdata, ...) { + m = .model$learner.model h2of = h2o::as.h2o(.newdata) p = h2o::h2o.predict(m, newdata = h2of, ...) diff --git a/R/RLearner_regr_h2oglm.R b/R/RLearner_regr_h2oglm.R index 6b07d95493..4bfe645e02 100644 --- a/R/RLearner_regr_h2oglm.R +++ b/R/RLearner_regr_h2oglm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.h2o.glm = function() { + makeRLearnerRegr( cl = "regr.h2o.glm", package = "h2o", @@ -28,7 +29,8 @@ makeRLearner.regr.h2o.glm = function() { } #' @export -trainLearner.regr.h2o.glm = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.h2o.glm = function(.learner, .task, .subset, .weights = NULL, ...) { + # check if h2o connection already exists, otherwise start one conn.up = tryCatch(h2o::h2o.getConnection(), error = function(err) return(FALSE)) if (!inherits(conn.up, "H2OConnection")) { @@ -48,6 +50,7 @@ trainLearner.regr.h2o.glm = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.regr.h2o.glm = function(.learner, .model, .newdata, ...) { + m = .model$learner.model h2of = h2o::as.h2o(.newdata) p = h2o::h2o.predict(m, newdata = h2of, ...) diff --git a/R/RLearner_regr_h2orandomForest.R b/R/RLearner_regr_h2orandomForest.R index e317b2e9c1..6b655f1e08 100644 --- a/R/RLearner_regr_h2orandomForest.R +++ b/R/RLearner_regr_h2orandomForest.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.h2o.randomForest = function() { + makeRLearnerRegr( cl = "regr.h2o.randomForest", package = "h2o", @@ -9,7 +10,7 @@ makeRLearner.regr.h2o.randomForest = function() { makeLogicalLearnerParam("build_tree_one_node", default = FALSE, tunable = FALSE), makeIntegerLearnerParam("ntrees", lower = 1L, default = 50L), makeIntegerLearnerParam("max_depth", lower = 1L, default = 20L), - makeIntegerLearnerParam("min_rows", lower = 1L, default = 1L), + makeIntegerLearnerParam("min_rows", lower = 1L, default = 1L), makeIntegerLearnerParam("nbins", lower = 1L, default = 20L), makeIntegerLearnerParam("nbins_cats", lower = 1L, default = 1024L), makeIntegerLearnerParam("seed", tunable = FALSE) @@ -22,7 +23,8 @@ makeRLearner.regr.h2o.randomForest = function() { } #' @export -trainLearner.regr.h2o.randomForest = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.h2o.randomForest = function(.learner, .task, .subset, .weights = NULL, ...) { + # check if h2o connection already exists, otherwise start one conn.up = tryCatch(h2o::h2o.getConnection(), error = function(err) return(FALSE)) if (!inherits(conn.up, "H2OConnection")) { @@ -37,6 +39,7 @@ trainLearner.regr.h2o.randomForest = function(.learner, .task, .subset, .weights #' @export predictLearner.regr.h2o.randomForest = function(.learner, .model, .newdata, ...) { + m = .model$learner.model h2of = h2o::as.h2o(.newdata) p = h2o::h2o.predict(m, newdata = h2of, ...) diff --git a/R/RLearner_regr_kknn.R b/R/RLearner_regr_kknn.R index 1566312db8..fc22405ed9 100644 --- a/R/RLearner_regr_kknn.R +++ b/R/RLearner_regr_kknn.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.kknn = function() { + makeRLearnerRegr( cl = "regr.kknn", # FIXME: kknn set its own contr.dummy function, if we requireNamespace, @@ -21,12 +22,14 @@ makeRLearner.regr.kknn = function() { } #' @export -trainLearner.regr.kknn = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.kknn = function(.learner, .task, .subset, .weights = NULL, ...) { + list(td = getTaskDesc(.task), data = getTaskData(.task, .subset), parset = list(...)) } #' @export predictLearner.regr.kknn = function(.learner, .model, .newdata, ...) { + m = .model$learner.model f = getTaskFormula(.model$task.desc) pars = c(list(formula = f, train = m$data, test = .newdata), m$parset, list(...)) diff --git a/R/RLearner_regr_km.R b/R/RLearner_regr_km.R index 68a1bc547a..849179cdd1 100644 --- a/R/RLearner_regr_km.R +++ b/R/RLearner_regr_km.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.km = function() { + makeRLearnerRegr( cl = "regr.km", package = "DiceKriging", @@ -39,11 +40,13 @@ makeRLearner.regr.km = function() { } #' @export -trainLearner.regr.km = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.km = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) args = list(...) - if (!is.null(args$optim.method) && args$optim.method == "gen") + if (!is.null(args$optim.method) && args$optim.method == "gen") { requirePackages(packs = "rgenoud", why = "fitting 'regr.km' with 'rgenoud' optimization") + } if (!is.null(args$nugget.stability)) { if (args$nugget.stability == 0) { args$nugget = 0 @@ -57,6 +60,7 @@ trainLearner.regr.km = function(.learner, .task, .subset, .weights = NULL, ...) #' @export predictLearner.regr.km = function(.learner, .model, .newdata, jitter, ...) { + # km with nugget estim perfectly interpolate the datas ONLY at exactly the training points # see JSS paper for explanation # so we add minimal, numerical jitter to the x points @@ -66,8 +70,9 @@ predictLearner.regr.km = function(.learner, .model, .newdata, jitter, ...) { } se = (.learner$predict.type != "response") p = DiceKriging::predict.km(.model$learner.model, newdata = .newdata, type = "SK", se.compute = se) - if (!se) + if (!se) { return(p$mean) - else + } else { cbind(p$mean, p$sd) + } } diff --git a/R/RLearner_regr_ksvm.R b/R/RLearner_regr_ksvm.R index 722c18cc04..32022f4edc 100644 --- a/R/RLearner_regr_ksvm.R +++ b/R/RLearner_regr_ksvm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.ksvm = function() { + makeRLearnerRegr( cl = "regr.ksvm", package = "kernlab", @@ -40,16 +41,19 @@ makeRLearner.regr.ksvm = function() { #' @export trainLearner.regr.ksvm = function(.learner, .task, .subset, .weights = NULL, degree, offset, scale, sigma, order, length, lambda, ...) { + kpar = learnerArgsToControl(list, degree, offset, scale, sigma, order, length, lambda) f = getTaskFormula(.task) # difference in missing(kpar) and kpar = list()! - if (base::length(kpar)) + if (base::length(kpar)) { kernlab::ksvm(f, data = getTaskData(.task, .subset), kpar = kpar, ...) - else + } else { kernlab::ksvm(f, data = getTaskData(.task, .subset), ...) + } } #' @export predictLearner.regr.ksvm = function(.learner, .model, .newdata, ...) { + kernlab::predict(.model$learner.model, newdata = .newdata, ...)[, 1L] } diff --git a/R/RLearner_regr_laGP.R b/R/RLearner_regr_laGP.R index 993a722d11..33ac7d9c55 100644 --- a/R/RLearner_regr_laGP.R +++ b/R/RLearner_regr_laGP.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.laGP = function() { + makeRLearnerRegr( cl = "regr.laGP", package = "laGP", @@ -26,12 +27,14 @@ makeRLearner.regr.laGP = function() { #' @export trainLearner.regr.laGP = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) return(list(data = d$data, target = d$target, parset = list(...))) } #' @export predictLearner.regr.laGP = function(.learner, .model, .newdata, ...) { + m = .model$learner.model pars = c(list(X = m$data, Z = m$target, XX = .newdata), Xi.ret = FALSE, m$parset, list(...)) diff --git a/R/RLearner_regr_liquidSVM.R b/R/RLearner_regr_liquidSVM.R index ebaf533a72..9a5905dc6c 100644 --- a/R/RLearner_regr_liquidSVM.R +++ b/R/RLearner_regr_liquidSVM.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.liquidSVM = function() { + makeRLearnerRegr( cl = "regr.liquidSVM", package = "liquidSVM", @@ -7,7 +8,7 @@ makeRLearner.regr.liquidSVM = function() { makeIntegerLearnerParam(id = "d", lower = 0L, upper = 7L, tunable = FALSE), makeLogicalLearnerParam(id = "scale", default = TRUE), makeIntegerLearnerParam(id = "threads", lower = -1L, default = 0), - makeDiscreteLearnerParam(id = "kernel", default = "gauss_rbf", values = c("gauss_rbf","poisson")), + makeDiscreteLearnerParam(id = "kernel", default = "gauss_rbf", values = c("gauss_rbf", "poisson")), makeIntegerLearnerParam(id = "partition_choice", lower = 0L, upper = 6L, default = 0), makeIntegerLearnerParam(id = "grid_choice", lower = -2L, upper = 2L), makeIntegerLearnerParam(id = "adaptivity_control", lower = 0L, upper = 2L, default = 0), @@ -25,7 +26,7 @@ makeRLearner.regr.liquidSVM = function() { makeNumericVectorLearnerParam(id = "c_values", lower = 0), makeLogicalLearnerParam(id = "useCells", default = FALSE) ), - properties = c( "numerics", "factors"), + properties = c("numerics", "factors"), name = "Support Vector Machines (liquidSVM)", short.name = "liquidSVM", callees = "liquidSVM" @@ -33,12 +34,14 @@ makeRLearner.regr.liquidSVM = function() { } #' @export -trainLearner.regr.liquidSVM = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.liquidSVM = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) - liquidSVM::svm(f, getTaskData(.task, .subset), ...) - } + liquidSVM::svm(f, getTaskData(.task, .subset), ...) +} #' @export predictLearner.regr.liquidSVM = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, ...) } diff --git a/R/RLearner_regr_lm.R b/R/RLearner_regr_lm.R index 52224b4d4a..b5109b99cd 100644 --- a/R/RLearner_regr_lm.R +++ b/R/RLearner_regr_lm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.lm = function() { + makeRLearnerRegr( cl = "regr.lm", package = "stats", @@ -15,12 +16,13 @@ makeRLearner.regr.lm = function() { } #' @export -trainLearner.regr.lm = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.lm = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset) if (is.null(.weights)) { f = getTaskFormula(.task) stats::lm(f, data = d, ...) - } else { + } else { f = getTaskFormula(.task) stats::lm(f, data = d, weights = .weights, ...) } @@ -28,6 +30,7 @@ trainLearner.regr.lm = function(.learner, .task, .subset, .weights = NULL, ...) #' @export predictLearner.regr.lm = function(.learner, .model, .newdata, ...) { + if (.learner$predict.type == "response") { predict(.model$learner.model, newdata = .newdata, se.fit = FALSE, ...) } else { diff --git a/R/RLearner_regr_mars.R b/R/RLearner_regr_mars.R index ad243775f5..c72c0e4266 100644 --- a/R/RLearner_regr_mars.R +++ b/R/RLearner_regr_mars.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.mars = function() { + makeRLearnerRegr( cl = "regr.mars", package = "mda", @@ -20,12 +21,14 @@ makeRLearner.regr.mars = function() { } #' @export -trainLearner.regr.mars = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.mars = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) mda::mars(x = as.matrix(d$data), y = d$target, ...) } #' @export predictLearner.regr.mars = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata)[, 1L] } diff --git a/R/RLearner_regr_mob.R b/R/RLearner_regr_mob.R index c038bbcfc4..e3c9e545e6 100644 --- a/R/RLearner_regr_mob.R +++ b/R/RLearner_regr_mob.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.mob = function() { + makeRLearnerRegr( cl = "regr.mob", package = c("party", "modeltools"), @@ -32,26 +33,30 @@ trainLearner.regr.mob = function(.learner, .task, .subset, .weights = NULL, alph feats = getTaskFeatureNames(.task) # FIXME: document stuff # FIXME: think about these defaults, also ask julia - if (missing(part.feats)) + if (missing(part.feats)) { part.feats = feats - if (missing(term.feats)) + } + if (missing(term.feats)) { term.feats = feats + } target = getTaskTargetNames(.task) f = as.formula(stri_paste(target, "~", collapse(term.feats, sep = " + "), "|", collapse(part.feats, sep = " + "), sep = " ")) if (is.null(.weights)) { model = party::mob(f, data = getTaskData(.task, .subset), control = cntrl, ...) - } else { + } else { model = party::mob(f, data = getTaskData(.task, .subset), control = cntrl, weights = .weights, ...) } # sometimes mob fails to fit a model but does not signal an exception. - if (anyMissing(coef(model))) + if (anyMissing(coef(model))) { stop("Failed to fit party::mob. Some coefficients are estimated as NA") + } model } #' @export predictLearner.regr.mob = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, ...) } diff --git a/R/RLearner_regr_nnet.R b/R/RLearner_regr_nnet.R index 65da2e0a20..1a795e9da6 100644 --- a/R/RLearner_regr_nnet.R +++ b/R/RLearner_regr_nnet.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.nnet = function() { + makeRLearnerRegr( cl = "regr.nnet", package = "nnet", @@ -27,11 +28,12 @@ makeRLearner.regr.nnet = function() { } #' @export -trainLearner.regr.nnet = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.nnet = function(.learner, .task, .subset, .weights = NULL, ...) { + if (is.null(.weights)) { f = getTaskFormula(.task) nnet::nnet(f, data = getTaskData(.task, .subset), linout = TRUE, ...) - } else { + } else { f = getTaskFormula(.task) nnet::nnet(f, data = getTaskData(.task, .subset), linout = TRUE, weights = .weights, ...) } @@ -39,5 +41,6 @@ trainLearner.regr.nnet = function(.learner, .task, .subset, .weights = NULL, .. #' @export predictLearner.regr.nnet = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, ...)[, 1L] } diff --git a/R/RLearner_regr_nodeHarvest.R b/R/RLearner_regr_nodeHarvest.R index 1e09135a6c..bccc902293 100644 --- a/R/RLearner_regr_nodeHarvest.R +++ b/R/RLearner_regr_nodeHarvest.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.nodeHarvest = function() { + makeRLearnerRegr( cl = "regr.nodeHarvest", package = "nodeHarvest", @@ -23,11 +24,13 @@ makeRLearner.regr.nodeHarvest = function() { #' @export trainLearner.regr.nodeHarvest = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) nodeHarvest::nodeHarvest(X = d$data, Y = d$target, ...) } #' @export predictLearner.regr.nodeHarvest = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, .newdata, ...) } diff --git a/R/RLearner_regr_pcr.R b/R/RLearner_regr_pcr.R index 53c057b8a1..fbcee969be 100644 --- a/R/RLearner_regr_pcr.R +++ b/R/RLearner_regr_pcr.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.pcr = function() { + makeRLearnerRegr( cl = "regr.pcr", package = "pls", @@ -21,13 +22,15 @@ makeRLearner.regr.pcr = function() { } #' @export -trainLearner.regr.pcr = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.pcr = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) pls::pcr(f, data = getTaskData(.task, .subset), ...) } #' @export predictLearner.regr.pcr = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, newdata = .newdata) p[, 1L, dim(p)[3L]] } diff --git a/R/RLearner_regr_penalized.R b/R/RLearner_regr_penalized.R index f0d643c82f..bf1af815ba 100644 --- a/R/RLearner_regr_penalized.R +++ b/R/RLearner_regr_penalized.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.penalized = function() { + makeRLearnerRegr( cl = "regr.penalized", package = "!penalized", @@ -32,16 +33,18 @@ makeRLearner.regr.penalized = function() { } #' @export -trainLearner.regr.penalized = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.penalized = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) penalized::penalized(f, data = getTaskData(.task, .subset), ...) } #' @export predictLearner.regr.penalized = function(.learner, .model, .newdata, ...) { + m = .model$learner.model # FIXME: should be removed, reported in issue 840 m@formula$unpenalized[[2L]] = as.symbol(.model$task.desc$target) .newdata[, .model$task.desc$target] = 0 - penalized::predict(m, data = .newdata, ...)[, "mu"] + penalized::predict(m, data = .newdata, ...)[, "mu"] } diff --git a/R/RLearner_regr_plsr.R b/R/RLearner_regr_plsr.R index 80b18a8615..00d75d2dd4 100644 --- a/R/RLearner_regr_plsr.R +++ b/R/RLearner_regr_plsr.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.plsr = function() { + makeRLearnerRegr(cl = "regr.plsr", package = "pls", par.set = makeParamSet( @@ -20,6 +21,7 @@ makeRLearner.regr.plsr = function() { #' @export trainLearner.regr.plsr = function(.learner, .task, .subset, .weights, ...) { + f = getTaskFormula(.task) d = getTaskData(.task, .subset) pls::plsr(f, data = d, ...) @@ -27,5 +29,6 @@ trainLearner.regr.plsr = function(.learner, .task, .subset, .weights, ...) { #' @export predictLearner.regr.plsr = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, comps = seq_len(.model$learner.model$ncomp), ...)[, 1L] } diff --git a/R/RLearner_regr_randomForest.R b/R/RLearner_regr_randomForest.R index 8eef1011ac..6c329dbd0c 100644 --- a/R/RLearner_regr_randomForest.R +++ b/R/RLearner_regr_randomForest.R @@ -51,6 +51,7 @@ NULL #' @export makeRLearner.regr.randomForest = function() { + makeRLearnerRegr( cl = "regr.randomForest", package = "randomForest", @@ -58,7 +59,7 @@ makeRLearner.regr.randomForest = function() { makeIntegerLearnerParam(id = "ntree", default = 500L, lower = 1L), makeIntegerLearnerParam(id = "se.ntree", default = 100L, lower = 1L, when = "both", requires = quote(se.method == "bootstrap")), makeDiscreteLearnerParam(id = "se.method", default = "sd", - values = c("bootstrap", "jackknife", "sd"), + values = c("bootstrap", "jackknife", "sd"), requires = quote(se.method %in% "jackknife" && keep.inbag == TRUE), when = "both"), makeIntegerLearnerParam(id = "se.boot", default = 50L, lower = 1L, when = "both"), @@ -87,6 +88,7 @@ makeRLearner.regr.randomForest = function() { #' @export trainLearner.regr.randomForest = function(.learner, .task, .subset, .weights = NULL, se.method = "sd", keep.inbag = NULL, se.boot = 50L, se.ntree = 100L, ...) { + data = getTaskData(.task, .subset, target.extra = TRUE) m = randomForest::randomForest(x = data[["data"]], y = data[["target"]], keep.inbag = if (is.null(keep.inbag)) TRUE else keep.inbag, ...) @@ -102,10 +104,12 @@ trainLearner.regr.randomForest = function(.learner, .task, .subset, .weights = N #' @export predictLearner.regr.randomForest = function(.learner, .model, .newdata, se.method = "sd", ...) { - if (se.method == "bootstrap") + + if (se.method == "bootstrap") { pred = predict(.model$learner.model$single.model, newdata = .newdata, ...) - else + } else { pred = predict(.model$learner.model, newdata = .newdata, ...) + } if (.learner$predict.type == "se") { se.fun = switch(se.method, bootstrap = bootstrapStandardError, @@ -121,6 +125,7 @@ predictLearner.regr.randomForest = function(.learner, .model, .newdata, se.metho #' @export getOOBPredsLearner.regr.randomForest = function(.learner, .model) { + getLearnerModel(.model, more.unwrap = TRUE)$predicted } @@ -129,8 +134,9 @@ getOOBPredsLearner.regr.randomForest = function(.learner, .model) { # Set se.ntree << ntree for the noisy bootstrap (mc bias corrected) bootstrapStandardError = function(.learner, .model, .newdata, se.ntree = 100L, se.boot = 50L, ...) { - single.model = getLearnerModel(.model)$single.model #get raw RF model - bagged.models = getLearnerModel(getLearnerModel(.model)$bagged.models) #get list of unbagged mlr models + + single.model = getLearnerModel(.model)$single.model # get raw RF model + bagged.models = getLearnerModel(getLearnerModel(.model)$bagged.models) # get list of unbagged mlr models pred.bagged = lapply(bagged.models, function(x) predict(getLearnerModel(x), newdata = .newdata, predict.all = TRUE)) pred.boot.all = extractSubList(pred.bagged, "individual", simplify = FALSE) ntree = single.model$ntree @@ -156,6 +162,7 @@ bootstrapStandardError = function(.learner, .model, .newdata, # Computes the mc bias-corrected jackknife after bootstrap jackknifeStandardError = function(.learner, .model, .newdata, ...) { + model = .model$learner.model model$inbag = model$inbag[rowSums(model$inbag == 0) > 0, , drop = FALSE] n = nrow(model$inbag) @@ -174,11 +181,13 @@ jackknifeStandardError = function(.learner, .model, .newdata, ...) { # computes the standard deviation across trees sdStandardError = function(.learner, .model, .newdata, ...) { + pred = predict(.model$learner.model, newdata = .newdata, predict.all = TRUE, ...) apply(pred$individual, 1, sd) } #' @export getFeatureImportanceLearner.regr.randomForest = function(.learner, .model, ...) { + getFeatureImportanceLearner.classif.randomForest(.learner, .model, ...) } diff --git a/R/RLearner_regr_randomForestSRC.R b/R/RLearner_regr_randomForestSRC.R index 405284560f..f7f4abbc23 100644 --- a/R/RLearner_regr_randomForestSRC.R +++ b/R/RLearner_regr_randomForestSRC.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.randomForestSRC = function() { + makeRLearnerRegr( cl = "regr.randomForestSRC", package = "randomForestSRC", @@ -51,12 +52,14 @@ makeRLearner.regr.randomForestSRC = function() { #' @export trainLearner.regr.randomForestSRC = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) randomForestSRC::rfsrc(f, data = getTaskData(.task, .subset), case.wt = .weights, ...) } #' @export predictLearner.regr.randomForestSRC = function(.learner, .model, .newdata, ...) { + p = predict(.model$learner.model, newdata = .newdata, membership = FALSE, ...) # versison 2.0 of randomForestSRC returns an array here :( as.numeric(p$predicted) @@ -64,11 +67,13 @@ predictLearner.regr.randomForestSRC = function(.learner, .model, .newdata, ...) #' @export getOOBPredsLearner.regr.randomForestSRC = function(.learner, .model) { + as.numeric(getLearnerModel(.model, more.unwrap = TRUE)$predicted.oob) } #' @rdname getFeatureImportanceLearner getFeatureImportanceLearner.regr.randomForestSRC = function(.learner, .model, ...) { + mod = getLearnerModel(.model, more.unwrap = TRUE) randomForestSRC::vimp(mod)$importance } diff --git a/R/RLearner_regr_ranger.R b/R/RLearner_regr_ranger.R index 50d4cdf9b6..ada4134f7e 100644 --- a/R/RLearner_regr_ranger.R +++ b/R/RLearner_regr_ranger.R @@ -1,6 +1,7 @@ #' @export makeRLearner.regr.ranger = function() { + makeRLearnerRegr( cl = "regr.ranger", package = "ranger", @@ -37,6 +38,7 @@ makeRLearner.regr.ranger = function() { #' @export trainLearner.regr.ranger = function(.learner, .task, .subset, .weights = NULL, ...) { + tn = getTaskTargetNames(.task) ranger::ranger(formula = NULL, dependent.variable = tn, data = getTaskData(.task, .subset), case.weights = .weights, ...) @@ -44,6 +46,7 @@ trainLearner.regr.ranger = function(.learner, .task, .subset, .weights = NULL, . #' @export predictLearner.regr.ranger = function(.learner, .model, .newdata, ...) { + type = if (.learner$predict.type == "se") "se" else "response" p = predict(object = .model$learner.model, data = .newdata, type = type, ...) if (.learner$predict.type == "se") { @@ -55,10 +58,12 @@ predictLearner.regr.ranger = function(.learner, .model, .newdata, ...) { #' @export getOOBPredsLearner.regr.ranger = function(.learner, .model) { + getLearnerModel(.model, more.unwrap = TRUE)$predictions } #' @export getFeatureImportanceLearner.regr.ranger = function(.learner, .model, ...) { + getFeatureImportanceLearner.classif.ranger(.learner, .model, ...) } diff --git a/R/RLearner_regr_rknn.R b/R/RLearner_regr_rknn.R index 6c946ce33f..6ad091d065 100644 --- a/R/RLearner_regr_rknn.R +++ b/R/RLearner_regr_rknn.R @@ -1,5 +1,6 @@ #' @export -makeRLearner.regr.rknn = function(){ +makeRLearner.regr.rknn = function() { + makeRLearnerRegr( cl = "regr.rknn", package = "rknn", @@ -20,13 +21,15 @@ makeRLearner.regr.rknn = function(){ } #' @export -trainLearner.regr.rknn = function(.learner, .task, .subset, .weights = NULL, ...){ +trainLearner.regr.rknn = function(.learner, .task, .subset, .weights = NULL, ...) { + z = getTaskData(.task, .subset, target.extra = TRUE) c(list(data = z$data, y = z$target), list(...)) } #' @export predictLearner.regr.rknn = function(.learner, .model, .newdata, ...) { + args = .model$learner.model args$newdata = .newdata do.call(rknn::rknnReg, args)$pred diff --git a/R/RLearner_regr_rpart.R b/R/RLearner_regr_rpart.R index 894dfb632a..99c5830706 100644 --- a/R/RLearner_regr_rpart.R +++ b/R/RLearner_regr_rpart.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.rpart = function() { + makeRLearnerRegr( cl = "regr.rpart", package = "rpart", @@ -25,12 +26,13 @@ makeRLearner.regr.rpart = function() { } #' @export -trainLearner.regr.rpart = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.rpart = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset) if (is.null(.weights)) { f = getTaskFormula(.task) rpart::rpart(f, data = d, ...) - } else { + } else { f = getTaskFormula(.task) rpart::rpart(f, data = d, weights = .weights, ...) } @@ -38,11 +40,12 @@ trainLearner.regr.rpart = function(.learner, .task, .subset, .weights = NULL, . #' @export predictLearner.regr.rpart = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, ...) } #' @export getFeatureImportanceLearner.regr.rpart = function(.learner, .model, ...) { + getFeatureImportanceLearner.classif.rpart(.learner, .model, ...) } - diff --git a/R/RLearner_regr_rsm.R b/R/RLearner_regr_rsm.R index 3a692f3714..7ac76718bb 100644 --- a/R/RLearner_regr_rsm.R +++ b/R/RLearner_regr_rsm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.rsm = function() { + makeRLearnerRegr( cl = "regr.rsm", package = "rsm", @@ -17,7 +18,8 @@ makeRLearner.regr.rsm = function() { } #' @export -trainLearner.regr.rsm = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.rsm = function(.learner, .task, .subset, .weights = NULL, ...) { + mf = list(...)$modelfun vs = stri_paste(getTaskFeatureNames(.task), collapse = ",", sep = " ") g = function(x) stri_paste(x, "(", vs, ")", sep = "") @@ -35,5 +37,6 @@ trainLearner.regr.rsm = function(.learner, .task, .subset, .weights = NULL, ... #' @export predictLearner.regr.rsm = function(.learner, .model, .newdata, ...) { + as.numeric(predict(.model$learner.model, newdata = .newdata, ...)) } diff --git a/R/RLearner_regr_rvm.R b/R/RLearner_regr_rvm.R index e332671668..222d9bb558 100644 --- a/R/RLearner_regr_rvm.R +++ b/R/RLearner_regr_rvm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.rvm = function() { + makeRLearnerRegr( cl = "regr.rvm", package = "kernlab", @@ -38,15 +39,18 @@ makeRLearner.regr.rvm = function() { #' @export trainLearner.regr.rvm = function(.learner, .task, .subset, .weights = NULL, degree, offset, scale, sigma, order, length, lambda, normalized, ...) { + kpar = learnerArgsToControl(list, degree, offset, scale, sigma, order, length, lambda, normalized) f = getTaskFormula(.task) - if (base::length(kpar)) + if (base::length(kpar)) { kernlab::rvm(f, data = getTaskData(.task, .subset), kpar = kpar, ...) - else + } else { kernlab::rvm(f, data = getTaskData(.task, .subset), ...) + } } #' @export predictLearner.regr.rvm = function(.learner, .model, .newdata, ...) { + kernlab::predict(.model$learner.model, newdata = .newdata, ...)[, 1L] } diff --git a/R/RLearner_regr_slim.R b/R/RLearner_regr_slim.R index 957da9b16d..d1abb0ee35 100644 --- a/R/RLearner_regr_slim.R +++ b/R/RLearner_regr_slim.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.slim = function() { + makeRLearnerRegr( cl = "regr.slim", package = "flare", @@ -29,11 +30,13 @@ makeRLearner.regr.slim = function() { #' @export trainLearner.regr.slim = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset, target.extra = TRUE) flare::slim(X = as.matrix(d$data), Y = d$target, ...) } #' @export predictLearner.regr.slim = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = as.matrix(.newdata), ...)[[1]][, 1L] } diff --git a/R/RLearner_regr_svm.R b/R/RLearner_regr_svm.R index 218a71e931..8604230007 100644 --- a/R/RLearner_regr_svm.R +++ b/R/RLearner_regr_svm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.regr.svm = function() { + makeRLearnerRegr( cl = "regr.svm", package = "e1071", @@ -9,7 +10,7 @@ makeRLearner.regr.svm = function() { makeIntegerLearnerParam(id = "degree", default = 3L, lower = 1L, requires = quote(kernel == "polynomial")), makeNumericLearnerParam(id = "gamma", lower = 0, requires = quote(kernel != "linear")), makeNumericLearnerParam(id = "coef0", default = 0, requires = quote(kernel == "polynomial" || kernel == "sigmoid")), - makeNumericLearnerParam(id = "cost", default = 1, lower = 0, requires = quote(type == "C-regrication")), + makeNumericLearnerParam(id = "cost", default = 1, lower = 0, requires = quote(type == "C-regrication")), makeNumericLearnerParam(id = "nu", default = 0.5, requires = quote(type == "nu-regression")), makeNumericLearnerParam(id = "cachesize", default = 40L), makeNumericLearnerParam(id = "tolerance", default = 0.001, lower = 0), @@ -27,12 +28,14 @@ makeRLearner.regr.svm = function() { } #' @export -trainLearner.regr.svm = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.svm = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) e1071::svm(f, data = getTaskData(.task, .subset), ...) } #' @export predictLearner.regr.svm = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, ...) } diff --git a/R/RLearner_regr_xgboost.R b/R/RLearner_regr_xgboost.R index 3d00501209..a8c86cc8b5 100644 --- a/R/RLearner_regr_xgboost.R +++ b/R/RLearner_regr_xgboost.R @@ -1,11 +1,12 @@ #' @export makeRLearner.regr.xgboost = function() { + makeRLearnerRegr( cl = "regr.xgboost", package = "xgboost", par.set = makeParamSet( # we pass all of what goes in 'params' directly to ... of xgboost - #makeUntypedLearnerParam(id = "params", default = list()), + # makeUntypedLearnerParam(id = "params", default = list()), makeDiscreteLearnerParam(id = "booster", default = "gbtree", values = c("gbtree", "gblinear", "dart")), makeUntypedLearnerParam(id = "watchlist", default = NULL, tunable = FALSE), makeNumericLearnerParam(id = "eta", default = 0.3, lower = 0, upper = 1), @@ -41,11 +42,11 @@ makeRLearner.regr.xgboost = function() { makeNumericLearnerParam(id = "rate_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")), makeNumericLearnerParam(id = "skip_drop", default = 0, lower = 0, upper = 1, requires = quote(booster == "dart")), # TODO: uncomment the following after the next CRAN update, and set max_depth's lower = 0L - #makeLogicalLearnerParam(id = "one_drop", default = FALSE, requires = quote(booster == "dart")), - #makeDiscreteLearnerParam(id = "tree_method", default = "exact", values = c("exact", "hist"), requires = quote(booster != "gblinear")), - #makeDiscreteLearnerParam(id = "grow_policy", default = "depthwise", values = c("depthwise", "lossguide"), requires = quote(tree_method == "hist")), - #makeIntegerLearnerParam(id = "max_leaves", default = 0L, lower = 0L, requires = quote(grow_policy == "lossguide")), - #makeIntegerLearnerParam(id = "max_bin", default = 256L, lower = 2L, requires = quote(tree_method == "hist")), + # makeLogicalLearnerParam(id = "one_drop", default = FALSE, requires = quote(booster == "dart")), + # makeDiscreteLearnerParam(id = "tree_method", default = "exact", values = c("exact", "hist"), requires = quote(booster != "gblinear")), + # makeDiscreteLearnerParam(id = "grow_policy", default = "depthwise", values = c("depthwise", "lossguide"), requires = quote(tree_method == "hist")), + # makeIntegerLearnerParam(id = "max_leaves", default = 0L, lower = 0L, requires = quote(grow_policy == "lossguide")), + # makeIntegerLearnerParam(id = "max_bin", default = 256L, lower = 2L, requires = quote(tree_method == "hist")), makeUntypedLearnerParam(id = "callbacks", default = list(), tunable = FALSE) ), par.vals = list(nrounds = 1L, verbose = 0L), @@ -58,31 +59,37 @@ makeRLearner.regr.xgboost = function() { } #' @export -trainLearner.regr.xgboost = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.regr.xgboost = function(.learner, .task, .subset, .weights = NULL, ...) { + parlist = list(...) - if (is.null(parlist$objective)) + if (is.null(parlist$objective)) { parlist$objective = "reg:linear" + } task.data = getTaskData(.task, .subset, target.extra = TRUE) parlist$data = xgboost::xgb.DMatrix(data = data.matrix(task.data$data), label = task.data$target) - if (!is.null(.weights)) + if (!is.null(.weights)) { xgboost::setinfo(parlist$data, "weight", .weights) + } - if (is.null(parlist$watchlist)) + if (is.null(parlist$watchlist)) { parlist$watchlist = list(train = parlist$data) + } do.call(xgboost::xgb.train, parlist) } #' @export predictLearner.regr.xgboost = function(.learner, .model, .newdata, ...) { + m = .model$learner.model predict(m, newdata = data.matrix(.newdata), ...) } #' @export getFeatureImportanceLearner.regr.xgboost = function(.learner, .model, ...) { + getFeatureImportanceLearner.classif.xgboost(.learner, .model, ...) } diff --git a/R/RLearner_surv_CoxBoost.R b/R/RLearner_surv_CoxBoost.R index b0fa892bc4..5b62b6eebb 100644 --- a/R/RLearner_surv_CoxBoost.R +++ b/R/RLearner_surv_CoxBoost.R @@ -1,5 +1,6 @@ #' @export makeRLearner.surv.CoxBoost = function() { + makeRLearnerSurv( cl = "surv.CoxBoost", package = "!CoxBoost", @@ -24,12 +25,14 @@ makeRLearner.surv.CoxBoost = function() { #' @export trainLearner.surv.CoxBoost = function(.learner, .task, .subset, .weights = NULL, penalty = NULL, unpen.index = NULL, ...) { + data = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "surv") info = getFixDataInfo(data$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) data$data = as.matrix(fixDataForLearner(data$data, info)) - if (is.null(penalty)) + if (is.null(penalty)) { penalty = 9 * sum(data$target[, 2L]) + } attachTrainingInfo(CoxBoost::CoxBoost( time = data$target[, 1L], @@ -43,6 +46,7 @@ trainLearner.surv.CoxBoost = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.surv.CoxBoost = function(.learner, .model, .newdata, ...) { + info = getTrainingInfo(.model) .newdata = as.matrix(fixDataForLearner(.newdata, info)) as.numeric(predict(.model$learner.model, newdata = .newdata, type = "lp")) diff --git a/R/RLearner_surv_cforest.R b/R/RLearner_surv_cforest.R index 887283b31c..7b256de054 100644 --- a/R/RLearner_surv_cforest.R +++ b/R/RLearner_surv_cforest.R @@ -1,5 +1,6 @@ #' @export makeRLearner.surv.cforest = function() { + makeRLearnerSurv( cl = "surv.cforest", package = c("party", "survival"), @@ -36,6 +37,7 @@ trainLearner.surv.cforest = function(.learner, .task, .subset, .weights = NULL, ntree, mtry, replace, fraction, trace, teststat, testtype, mincriterion, minsplit, minbucket, stump, nresample, maxsurrogate, maxdepth, savesplitstats, ...) { + f = getTaskFormula(.task) d = getTaskData(.task, .subset) defaults = getDefaults(getParamSet(.learner)) @@ -53,11 +55,13 @@ trainLearner.surv.cforest = function(.learner, .task, .subset, #' @export predictLearner.surv.cforest = function(.learner, .model, .newdata, ...) { + # cforest returns median survival times; multiply by -1 so that high values correspond to high risk -1 * predict(.model$learner.model, newdata = .newdata, type = "response", ...) } #' @export getFeatureImportanceLearner.surv.cforest = function(.learner, .model, ...) { + getFeatureImportanceLearner.classif.cforest(.learner, .model, ...) } diff --git a/R/RLearner_surv_coxph.R b/R/RLearner_surv_coxph.R index 51bf9d3f76..1dc9cce641 100644 --- a/R/RLearner_surv_coxph.R +++ b/R/RLearner_surv_coxph.R @@ -1,5 +1,6 @@ #' @export makeRLearner.surv.coxph = function() { + makeRLearnerSurv( cl = "surv.coxph", package = "survival", @@ -23,17 +24,19 @@ makeRLearner.surv.coxph = function() { } #' @export -trainLearner.surv.coxph = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.surv.coxph = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) data = getTaskData(.task, subset = .subset) if (is.null(.weights)) { survival::coxph(formula = f, data = data, ...) - } else { + } else { survival::coxph(formula = f, data = data, weights = .weights, ...) } } #' @export predictLearner.surv.coxph = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, type = "lp", ...) } diff --git a/R/RLearner_surv_cv.CoxBoost.R b/R/RLearner_surv_cv.CoxBoost.R index 66de066b45..a25d39ad8e 100644 --- a/R/RLearner_surv_cv.CoxBoost.R +++ b/R/RLearner_surv_cv.CoxBoost.R @@ -1,5 +1,6 @@ #' @export makeRLearner.surv.cv.CoxBoost = function() { + makeRLearnerSurv( cl = "surv.cv.CoxBoost", package = "!CoxBoost", @@ -27,11 +28,13 @@ makeRLearner.surv.cv.CoxBoost = function() { #' @export trainLearner.surv.cv.CoxBoost = function(.learner, .task, .subset, .weights = NULL, penalty = NULL, unpen.index = NULL, ...) { + data = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "surv") info = getFixDataInfo(data$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) - if (is.null(penalty)) + if (is.null(penalty)) { penalty = 9 * sum(data$target[, 2L]) + } pars = c(list( time = data$target[, 1L], @@ -44,8 +47,9 @@ trainLearner.surv.cv.CoxBoost = function(.learner, .task, .subset, .weights = NU res = do.call(CoxBoost::cv.CoxBoost, pars) res$optimal.step - if (res$optimal.step == 0L) + if (res$optimal.step == 0L) { warning("Could not determine the optimal step number in cv.CoxBoost") + } pars = insert(pars, list(stepno = res$optimal.step)) pars$maxstepno = NULL @@ -54,6 +58,7 @@ trainLearner.surv.cv.CoxBoost = function(.learner, .task, .subset, .weights = NU #' @export predictLearner.surv.cv.CoxBoost = function(.learner, .model, .newdata, ...) { + info = getTrainingInfo(.model) .newdata = as.matrix(fixDataForLearner(.newdata, info)) as.numeric(predict(.model$learner.model, newdata = .newdata, type = "lp")) diff --git a/R/RLearner_surv_cvglmnet.R b/R/RLearner_surv_cvglmnet.R index 7b6354f04c..059850313c 100644 --- a/R/RLearner_surv_cvglmnet.R +++ b/R/RLearner_surv_cvglmnet.R @@ -1,5 +1,6 @@ #' @export makeRLearner.surv.cvglmnet = function() { + makeRLearnerSurv( cl = "surv.cvglmnet", package = "glmnet", @@ -41,13 +42,15 @@ makeRLearner.surv.cvglmnet = function() { } #' @export -trainLearner.surv.cvglmnet = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.surv.cvglmnet = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "surv") info = getFixDataInfo(d$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) args = c(list(x = as.matrix(fixDataForLearner(d$data, info)), y = d$target, family = "cox", parallel = FALSE), list(...)) rm(d) - if (!is.null(.weights)) + if (!is.null(.weights)) { args$weights = .weights + } saved.ctrl = glmnet::glmnet.control() is.ctrl.arg = names(args) %in% names(saved.ctrl) @@ -62,6 +65,7 @@ trainLearner.surv.cvglmnet = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.surv.cvglmnet = function(.learner, .model, .newdata, ...) { + info = getTrainingInfo(.model) .newdata = as.matrix(fixDataForLearner(.newdata, info)) as.numeric(predict(.model$learner.model, newx = .newdata, type = "link", ...)) diff --git a/R/RLearner_surv_gamboost.R b/R/RLearner_surv_gamboost.R index 5172a55020..bdbc73176f 100644 --- a/R/RLearner_surv_gamboost.R +++ b/R/RLearner_surv_gamboost.R @@ -1,5 +1,6 @@ #' @export makeRLearner.surv.gamboost = function() { + makeRLearnerSurv( cl = "surv.gamboost", package = c("!survival", "mboost"), @@ -30,6 +31,7 @@ makeRLearner.surv.gamboost = function() { #' @export trainLearner.surv.gamboost = function(.learner, .task, .subset, .weights = NULL, nuirange = c(0, 100), family, custom.family.definition, mstop, nu, risk, stopintern, trace, ...) { + requirePackages("mboost", why = "argument 'baselearner' requires package", suppress.warnings = TRUE) ctrl = learnerArgsToControl(mboost::boost_control, mstop, nu, risk, trace, stopintern) family = switch(family, @@ -41,16 +43,17 @@ trainLearner.surv.gamboost = function(.learner, .task, .subset, .weights = NULL, custom.family = custom.family.definition ) - f = getTaskFormula(.task) - data = getTaskData(.task, subset = .subset, recode.target = "surv") - if (is.null(.weights)) { - model = mboost::gamboost(f, data = data, control = ctrl, family = family, ...) - } else { - model = mboost::gamboost(f, data = getTaskData(.task, subset = .subset, recode.target = "surv"), control = ctrl, weights = .weights, family = family, ...) - } + f = getTaskFormula(.task) + data = getTaskData(.task, subset = .subset, recode.target = "surv") + if (is.null(.weights)) { + model = mboost::gamboost(f, data = data, control = ctrl, family = family, ...) + } else { + model = mboost::gamboost(f, data = getTaskData(.task, subset = .subset, recode.target = "surv"), control = ctrl, weights = .weights, family = family, ...) + } } #' @export predictLearner.surv.gamboost = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, type = "link") } diff --git a/R/RLearner_surv_gbm.R b/R/RLearner_surv_gbm.R index 41d9fb4c24..24f8a8fd0c 100644 --- a/R/RLearner_surv_gbm.R +++ b/R/RLearner_surv_gbm.R @@ -1,5 +1,6 @@ #' @export makeRLearner.surv.gbm = function() { + makeRLearnerSurv( cl = "surv.gbm", package = "gbm", @@ -25,25 +26,28 @@ makeRLearner.surv.gbm = function() { } #' @export -trainLearner.surv.gbm = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.surv.gbm = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, .subset) f = getTaskFormula(.task) if (is.null(.weights)) { gbm::gbm(f, data = d, distribution = "coxph", ...) - } else { + } else { gbm::gbm(f, data = d, weights = .weights, distribution = "coxph", ...) } } #' @export predictLearner.surv.gbm = function(.learner, .model, .newdata, ...) { + m = .model$learner.model gbm::predict.gbm(m, newdata = .newdata, type = "response", n.trees = m$n.trees, single.tree = FALSE, ...) } #' @export getFeatureImportanceLearner.surv.gbm = function(.learner, .model, ...) { + mod = getLearnerModel(.model) gbm::relative.influence(mod, mod$n.trees, ...) } diff --git a/R/RLearner_surv_glmboost.R b/R/RLearner_surv_glmboost.R index e73ae537da..3a828ed2a5 100644 --- a/R/RLearner_surv_glmboost.R +++ b/R/RLearner_surv_glmboost.R @@ -1,5 +1,6 @@ #' @export makeRLearner.surv.glmboost = function() { + makeRLearnerSurv( cl = "surv.glmboost", package = c("!survival", "mboost"), @@ -30,6 +31,7 @@ makeRLearner.surv.glmboost = function() { #' @export trainLearner.surv.glmboost = function(.learner, .task, .subset, .weights = NULL, nuirange = c(0, 100), family, custom.family.definition, mstop, nu, risk, stopintern, trace, use.formula, ...) { + ctrl = learnerArgsToControl(mboost::boost_control, mstop, nu, risk, trace, stopintern) family = switch(family, CoxPH = mboost::CoxPH(), @@ -38,12 +40,12 @@ trainLearner.surv.glmboost = function(.learner, .task, .subset, .weights = NULL, Lognormal = mboost::Lognormal(nuirange = nuirange), Gehan = mboost::Gehan(), custom.family = custom.family.definition - ) + ) if (use.formula) { f = getTaskFormula(.task) model = if (is.null(.weights)) { mboost::glmboost(f, data = getTaskData(.task, subset = .subset, recode.target = "surv"), control = ctrl, family = family, ...) - } else { + } else { mboost::glmboost(f, data = getTaskData(.task, subset = .subset, recode.target = "surv"), control = ctrl, weights = .weights, family = family, ...) } } else { @@ -62,6 +64,7 @@ trainLearner.surv.glmboost = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.surv.glmboost = function(.learner, .model, .newdata, use.formula, ...) { + if (!use.formula) { info = getTrainingInfo(.model) .newdata = as.matrix(fixDataForLearner(.newdata, info)) diff --git a/R/RLearner_surv_glmnet.R b/R/RLearner_surv_glmnet.R index 34b0b8700b..f745081258 100644 --- a/R/RLearner_surv_glmnet.R +++ b/R/RLearner_surv_glmnet.R @@ -1,5 +1,6 @@ #' @export makeRLearner.surv.glmnet = function() { + makeRLearnerSurv( cl = "surv.glmnet", package = "glmnet", @@ -46,13 +47,15 @@ makeRLearner.surv.glmnet = function() { } #' @export -trainLearner.surv.glmnet = function(.learner, .task, .subset, .weights = NULL, ...) { +trainLearner.surv.glmnet = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, subset = .subset, target.extra = TRUE, recode.target = "surv") info = getFixDataInfo(d$data, factors.to.dummies = TRUE, ordered.to.int = TRUE) args = c(list(x = as.matrix(fixDataForLearner(d$data, info)), y = d$target, family = "cox"), list(...)) rm(d) - if (!is.null(.weights)) + if (!is.null(.weights)) { args$weights = .weights + } glmnet::glmnet.control(factory = TRUE) saved.ctrl = glmnet::glmnet.control() @@ -68,6 +71,7 @@ trainLearner.surv.glmnet = function(.learner, .task, .subset, .weights = NULL, #' @export predictLearner.surv.glmnet = function(.learner, .model, .newdata, ...) { + info = getTrainingInfo(.model) .newdata = as.matrix(fixDataForLearner(.newdata, info)) as.numeric(predict(.model$learner.model, newx = .newdata, type = "link", ...)) diff --git a/R/RLearner_surv_randomForestSRC.R b/R/RLearner_surv_randomForestSRC.R index 0e6eb885c2..389ea46dd7 100644 --- a/R/RLearner_surv_randomForestSRC.R +++ b/R/RLearner_surv_randomForestSRC.R @@ -1,5 +1,6 @@ #' @export makeRLearner.surv.randomForestSRC = function() { + makeRLearnerSurv( cl = "surv.randomForestSRC", package = c("survival", "randomForestSRC"), @@ -52,21 +53,25 @@ makeRLearner.surv.randomForestSRC = function() { #' @export trainLearner.surv.randomForestSRC = function(.learner, .task, .subset, .weights = NULL, ...) { + f = getTaskFormula(.task) randomForestSRC::rfsrc(f, data = getTaskData(.task, subset = .subset), case.wt = .weights, ...) } #' @export predictLearner.surv.randomForestSRC = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, membership = FALSE, ...)$predicted } #' @export getOOBPredsLearner.surv.randomForestSRC = function(.learner, .model) { + .model$learner.model$predicted.oob } #' @export getFeatureImportanceLearner.surv.randomForestSRC = function(.learner, .model, ...) { + getFeatureImportanceLearner.regr.randomForestSRC(.learner, .model, ...) } diff --git a/R/RLearner_surv_ranger.R b/R/RLearner_surv_ranger.R index 12130a3811..314dbfdb6b 100644 --- a/R/RLearner_surv_ranger.R +++ b/R/RLearner_surv_ranger.R @@ -1,6 +1,7 @@ #' @export makeRLearner.surv.ranger = function() { + makeRLearnerSurv( cl = "surv.ranger", package = "ranger", @@ -38,18 +39,21 @@ makeRLearner.surv.ranger = function() { #' @export trainLearner.surv.ranger = function(.learner, .task, .subset, .weights = NULL, ...) { + tn = getTaskTargetNames(.task) ranger::ranger(formula = NULL, dependent.variable.name = tn[1L], - status.variable.name = tn[2L], data = getTaskData(.task, .subset), case.weights = .weights, ...) + status.variable.name = tn[2L], data = getTaskData(.task, .subset), case.weights = .weights, ...) } #' @export predictLearner.surv.ranger = function(.learner, .model, .newdata, ...) { + p = predict(object = .model$learner.model, data = .newdata) rowMeans(p$chf) } #' @export getFeatureImportanceLearner.surv.ranger = function(.learner, .model, ...) { + getFeatureImportanceLearner.classif.ranger(.learner, .model, ...) } diff --git a/R/RLearner_surv_rpart.R b/R/RLearner_surv_rpart.R index cf56f5c4a1..29443b6740 100644 --- a/R/RLearner_surv_rpart.R +++ b/R/RLearner_surv_rpart.R @@ -1,5 +1,6 @@ #' @export makeRLearner.surv.rpart = function() { + makeRLearnerSurv( cl = "surv.rpart", package = "rpart", @@ -27,21 +28,24 @@ makeRLearner.surv.rpart = function() { #' @export trainLearner.surv.rpart = function(.learner, .task, .subset, .weights = NULL, ...) { + d = getTaskData(.task, subset = .subset) f = getTaskFormula(.task) if (is.null(.weights)) { rpart::rpart(f, data = d, method = "exp", ...) - } else { + } else { rpart::rpart(f, data = d, weights = .weights, method = "exp", ...) } } #' @export predictLearner.surv.rpart = function(.learner, .model, .newdata, ...) { + predict(.model$learner.model, newdata = .newdata, type = "vector", ...) } #' @export getFeatureImportanceLearner.surv.rpart = function(.learner, .model, ...) { + getFeatureImportanceLearner.classif.rpart(.learner, .model, ...) } diff --git a/R/RegrTask.R b/R/RegrTask.R index 3f3781b6d4..2d93d3397f 100644 --- a/R/RegrTask.R +++ b/R/RegrTask.R @@ -1,6 +1,7 @@ #' @export #' @rdname Task makeRegrTask = function(id = deparse(substitute(data)), data, target, weights = NULL, blocking = NULL, coordinates = NULL, fixup.data = "warn", check.data = TRUE) { + assertString(id) assertDataFrame(data) assertString(target) @@ -8,8 +9,9 @@ makeRegrTask = function(id = deparse(substitute(data)), data, target, weights = assertFlag(check.data) if (fixup.data != "no") { - if (is.integer(data[[target]])) + if (is.integer(data[[target]])) { data[[target]] = as.double(data[[target]]) + } } task = makeSupervisedTask("regr", data, target, weights, blocking, coordinates, fixup.data = fixup.data, check.data = check.data) @@ -25,5 +27,6 @@ makeRegrTask = function(id = deparse(substitute(data)), data, target, weights = #' @export #' @rdname makeTaskDesc makeRegrTaskDesc = function(id, data, target, weights, blocking, coordinates) { + addClasses(makeTaskDescInternal("regr", id, data, target, weights, blocking, coordinates), c("RegrTaskDesc", "SupervisedTaskDesc")) } diff --git a/R/RemoveConstantFeaturesWrapper.R b/R/RemoveConstantFeaturesWrapper.R index bfe305b26a..0aa5010ecd 100644 --- a/R/RemoveConstantFeaturesWrapper.R +++ b/R/RemoveConstantFeaturesWrapper.R @@ -9,17 +9,20 @@ #' @family wrapper #' @template ret_learner makeRemoveConstantFeaturesWrapper = function(learner, perc = 0, dont.rm = character(0L), na.ignore = FALSE, tol = .Machine$double.eps^.5) { + learner = checkLearner(learner) args = list(perc = perc, dont.rm = dont.rm, na.ignore = na.ignore, tol = tol) rm(list = names(args)) trainfun = function(data, target, args) { + args$dont.rm = union(args$dont.rm, target) tmp = do.call(removeConstantFeatures, c(list(obj = data), args)) list(data = tmp, control = list(dropped.cols = setdiff(names(data), names(tmp)))) } predictfun = function(data, target, args, control) { + dropNamed(data, control$dropped.cols) } diff --git a/R/ResampleDesc.R b/R/ResampleDesc.R index 07bd08929b..7502cb4abf 100644 --- a/R/ResampleDesc.R +++ b/R/ResampleDesc.R @@ -96,24 +96,27 @@ #' # Bootstraping #' makeResampleDesc("Bootstrap", iters = 10) #' makeResampleDesc("Bootstrap", iters = 10, predict = "both") -#' +#' #' # Subsampling -#' makeResampleDesc("Subsample", iters = 10, split = 3/4) +#' makeResampleDesc("Subsample", iters = 10, split = 3 / 4) #' makeResampleDesc("Subsample", iters = 10) -#' +#' #' # Holdout a.k.a. test sample estimation #' makeResampleDesc("Holdout") makeResampleDesc = function(method, predict = "test", ..., stratify = FALSE, stratify.cols = NULL, fixed = FALSE, blocking.cv = FALSE) { - assertChoice(method, choices = c("Holdout", "CV", "LOO", "RepCV", - "Subsample", "Bootstrap", "SpCV", "SpRepCV", - "GrowingWindowCV", "FixedWindowCV")) + + assertChoice(method, choices = c("Holdout", "CV", "LOO", "RepCV", + "Subsample", "Bootstrap", "SpCV", "SpRepCV", + "GrowingWindowCV", "FixedWindowCV")) assertChoice(predict, choices = c("train", "test", "both")) assertFlag(stratify) - if (stratify && method == "LOO") + if (stratify && method == "LOO") { stop("Stratification cannot be done for LOO!") - if (stratify && ! is.null(stratify.cols)) + } + if (stratify && !is.null(stratify.cols)) { stop("Arguments 'stratify' and 'stratify.cols' are mutually exclusive!") + } d = do.call(stri_paste("makeResampleDesc", method), list(...)) d$predict = predict d$stratify = stratify @@ -125,12 +128,14 @@ makeResampleDesc = function(method, predict = "test", ..., stratify = FALSE, makeResampleDescInternal = function(id, iters, predict = "test", ...) { + setClasses(insert(list(...), list(id = id, iters = iters, predict = predict)), "ResampleDesc") } #' @export print.ResampleDesc = function(x, ...) { + catf("Resample description: %s with %i iterations.", x$id, x$iters) catf("Predict: %s", x$predict) catf("Stratification: %s", x$stratify) @@ -144,37 +149,44 @@ print.ResampleDesc = function(x, ...) { ############################################################################################## makeResampleDescHoldout = function(iters, split = 2 / 3) { + assertNumber(split, lower = 0, upper = 1) makeResampleDescInternal("holdout", iters = 1L, split = split) } makeResampleDescCV = function(iters = 10L, fixed = FALSE, blocking.cv = FALSE) { + iters = asInt(iters, lower = 2L) makeResampleDescInternal("cross-validation", iters = iters, fixed = fixed, blocking.cv = blocking.cv) } makeResampleDescSpCV = function(iters = 10L) { + iters = asInt(iters, lower = 2L) makeResampleDescInternal("spatial cross-validation", iters = iters) } makeResampleDescLOO = function() { + makeResampleDescInternal("LOO", iters = NA_integer_) } makeResampleDescSubsample = function(iters = 30L, split = 2 / 3) { + iters = asCount(iters, positive = TRUE) assertNumber(split, lower = 0, upper = 1) makeResampleDescInternal("subsampling", iters = iters, split = split) } makeResampleDescBootstrap = function(iters = 30L) { + iters = asCount(iters, positive = TRUE) makeResampleDescInternal("OOB bootstrapping", iters = iters) } makeResampleDescRepCV = function(reps = 10L, folds = 10L, fixed = FALSE, blocking.cv = FALSE) { + reps = asInt(reps, lower = 2L) folds = asInt(folds, lower = 2L) makeResampleDescInternal("repeated cross-validation", iters = folds * reps, folds = folds, reps = reps, @@ -182,6 +194,7 @@ makeResampleDescRepCV = function(reps = 10L, folds = 10L, fixed = FALSE, blockin } makeResampleDescSpRepCV = function(reps = 10L, folds = 10L) { + reps = asInt(reps, lower = 2L) folds = asInt(folds, lower = 2L) makeResampleDescInternal("repeated spatial cross-validation", iters = folds * reps, folds = folds, reps = reps) @@ -189,25 +202,28 @@ makeResampleDescSpRepCV = function(reps = 10L, folds = 10L) { makeResampleDescFixedWindowCV = function(horizon = 1L, initial.window = .5, skip = horizon - 1) { + assertNumeric(horizon, lower = 0) assertNumeric(initial.window, lower = 0) assertNumeric(skip, lower = 0) - makeResampleDescInternal("Fixed", iters = NA_integer_, horizon = horizon, - initial.window = initial.window, skip = skip, stratify = FALSE) + makeResampleDescInternal("Fixed", iters = NA_integer_, horizon = horizon, + initial.window = initial.window, skip = skip, stratify = FALSE) } makeResampleDescGrowingWindowCV = function(horizon = 1L, initial.window = .5, skip = horizon - 1) { + assertNumeric(horizon, lower = 0) assertNumeric(initial.window, lower = 0) assertNumeric(skip, lower = 0) makeResampleDescInternal("Growing", iters = NA_integer_, horizon = horizon, - initial.window = initial.window, skip = skip, stratify = FALSE) + initial.window = initial.window, skip = skip, stratify = FALSE) } ############################################################################################## #' @export print.HoldoutDesc = function(x, ...) { + catf("Resample description: %s with %.2f split rate.", x$id, x$split) catf("Predict: %s", x$predict) @@ -216,6 +232,7 @@ print.HoldoutDesc = function(x, ...) { #' @export print.SubsampleDesc = function(x, ...) { + catf("Resample description: %s with %i iterations and %.2f split rate.", x$id, x$iters, x$split) catf("Predict: %s", x$predict) @@ -224,6 +241,7 @@ print.SubsampleDesc = function(x, ...) { #' @export print.RepCVDesc = function(x, ...) { + catf("Resample description: %s with %i iterations: %i folds and %i reps.", x$id, x$iters, x$iters / x$reps, x$reps) catf("Predict: %s", x$predict) @@ -232,16 +250,18 @@ print.RepCVDesc = function(x, ...) { #' @export print.GrowingWindowCVDesc = function(x, ...) { + catf("Window description:\n %s: %.2f in initial window, horizon of %.2f, and skipping %.2f windows.", - x$id, x$initial.window, x$horizon, x$skip) + x$id, x$initial.window, x$horizon, x$skip) catf("Predict: %s", x$predict) catf("Stratification: %s", x$stratify) } #' @export print.FixedWindowCVDesc = function(x, ...) { + catf("Window description:\n %s: %.2f in initial window, horizon of %.2f, and skipping %.2f windows.", - x$id, x$initial.window, x$horizon, x$skip) + x$id, x$initial.window, x$horizon, x$skip) catf("Predict: %s", x$predict) catf("Stratification: %s", x$stratify) } @@ -300,4 +320,3 @@ cv5 = makeResampleDesc("CV", iters = 5L) #' @format NULL #' @keywords NULL cv10 = makeResampleDesc("CV", iters = 10L) - diff --git a/R/ResampleInstance.R b/R/ResampleInstance.R index 096d91958c..815e26c28c 100644 --- a/R/ResampleInstance.R +++ b/R/ResampleInstance.R @@ -35,15 +35,17 @@ #' @examples #' rdesc = makeResampleDesc("Bootstrap", iters = 10) #' rin = makeResampleInstance(rdesc, task = iris.task) -#' +#' #' rdesc = makeResampleDesc("CV", iters = 50) #' rin = makeResampleInstance(rdesc, size = nrow(iris)) -#' +#' #' rin = makeResampleInstance("CV", iters = 10, task = iris.task) makeResampleInstance = function(desc, task, size, ...) { + assert(checkClass(desc, "ResampleDesc"), checkString(desc)) - if (is.character(desc)) + if (is.character(desc)) { desc = makeResampleDesc(desc, ...) + } if (!xor(missing(task), missing(size))) { stop("One of 'size' or 'task' must be supplied") } @@ -55,11 +57,13 @@ makeResampleInstance = function(desc, task, size, ...) { task = NULL blocking = factor() } - if (!missing(size)) + if (!missing(size)) { size = asCount(size) + } - if (length(blocking) && desc$stratify) + if (length(blocking) && desc$stratify) { stop("Blocking can currently not be mixed with stratification in resampling!") + } # 'fixed' only exists by default for 'CV' -> is.null(desc$fixed) # only use this way of blocking if 'fixed = FALSE' -> is.null(desc$fixed) @@ -72,8 +76,9 @@ makeResampleInstance = function(desc, task, size, ...) { } if (length(blocking) > 0 && !fixed && blocking.cv) { - if (is.null(task)) + if (is.null(task)) { stop("Blocking always needs the task!") + } levs = levels(blocking) size2 = length(levs) # create instance for blocks @@ -84,8 +89,9 @@ makeResampleInstance = function(desc, task, size, ...) { inst$test.inds = lapply(inst$train.inds, function(x) setdiff(ti, x)) inst$size = size } else if (desc$stratify || !is.null(desc$stratify.cols)) { - if (is.null(task)) + if (is.null(task)) { stop("Stratification always needs the task!") + } if (desc$stratify) { td = getTaskDesc(task) stratify.cols = switch(td$type, @@ -98,11 +104,13 @@ makeResampleInstance = function(desc, task, size, ...) { cn = c(getTaskFeatureNames(task), getTaskTargetNames(task)) i = which(stratify.cols %nin% cn) - if (length(i) > 0L) + if (length(i) > 0L) { stopf("Columns specified for stratification, but not present in task: %s", collapse(stratify.cols[i])) + } index = getTaskData(task, features = stratify.cols, target.extra = FALSE)[stratify.cols] - if (any(vlapply(index, is.double))) + if (any(vlapply(index, is.double))) { stop("Stratification on numeric double-precision variables not possible") + } grp = tapply(seq_row(index), index, simplify = FALSE) grp = unname(split(seq_row(index), grp)) @@ -129,6 +137,7 @@ makeResampleInstance = function(desc, task, size, ...) { } makeResampleInstanceInternal = function(desc, size, train.inds, test.inds, group = factor()) { + if (missing(test.inds) && !missing(train.inds)) { # shuffle data set and remove inds test.inds = sample(size) @@ -150,6 +159,7 @@ makeResampleInstanceInternal = function(desc, size, train.inds, test.inds, group #' @export print.ResampleInstance = function(x, ...) { + catf("Resample instance for %i cases.", x$size) print(x$desc) } diff --git a/R/ResampleInstances.R b/R/ResampleInstances.R index 0d7e1faa5c..08ce236082 100644 --- a/R/ResampleInstances.R +++ b/R/ResampleInstances.R @@ -1,8 +1,10 @@ instantiateResampleInstance = function(desc, size, task) { + UseMethod("instantiateResampleInstance") } instantiateResampleInstance.HoldoutDesc = function(desc, size, task = NULL) { + inds = sample(size, size * desc$split) makeResampleInstanceInternal(desc, size, train.inds = list(inds)) } @@ -20,7 +22,7 @@ instantiateResampleInstance.CVDesc = function(desc, size, task = NULL) { # CV with only predefined indices ("fixed") - if(is.null(task$blocking)) { + if (is.null(task$blocking)) { stopf("To use blocking in resampling, you need to pass a factor variable when creating the task!") } @@ -52,7 +54,7 @@ instantiateResampleInstance.CVDesc = function(desc, size, task = NULL) { if (0 %in% length.test.inds) { index = match(0, length.test.inds) test.inds[[index]] = NULL - size = length(task$env$data[,1]) + size = length(task$env$data[, 1]) desc$iters = length(test.inds) } makeResampleInstanceInternal(desc, size, test.inds = test.inds) @@ -81,21 +83,25 @@ instantiateResampleInstance.SpCVDesc = function(desc, size, task = NULL) { } instantiateResampleInstance.LOODesc = function(desc, size, task = NULL) { + desc$iters = size makeResampleInstanceInternal(desc, size, test.inds = as.list(seq_len(size))) } instantiateResampleInstance.SubsampleDesc = function(desc, size, task = NULL) { + inds = lapply(seq_len(desc$iters), function(x) sample(size, size * desc$split)) makeResampleInstanceInternal(desc, size, train.inds = inds) } instantiateResampleInstance.BootstrapDesc = function(desc, size, task = NULL) { + inds = lapply(seq_len(desc$iters), function(x) sample(size, size, replace = TRUE)) makeResampleInstanceInternal(desc, size, train.inds = inds) } instantiateResampleInstance.RepCVDesc = function(desc, size, task = NULL) { + folds = desc$iters / desc$reps d = makeResampleDesc("CV", iters = folds, blocking.cv = desc$blocking.cv, fixed = desc$fixed) i = replicate(desc$reps, makeResampleInstance(d, size = size), simplify = FALSE) @@ -106,6 +112,7 @@ instantiateResampleInstance.RepCVDesc = function(desc, size, task = NULL) { } instantiateResampleInstance.SpRepCVDesc = function(desc, size, task = NULL) { + folds = desc$iters / desc$reps d = makeResampleDesc("SpCV", iters = folds) i = replicate(desc$reps, makeResampleInstance(d, task = task), simplify = FALSE) @@ -116,17 +123,20 @@ instantiateResampleInstance.SpRepCVDesc = function(desc, size, task = NULL) { } instantiateResampleInstance.FixedWindowCVDesc = function(desc, size, task = NULL, coords) { + makeResamplingWindow(desc, size, task, coords, "FixedWindowCV") } instantiateResampleInstance.GrowingWindowCVDesc = function(desc, size, task = NULL, coords) { + makeResamplingWindow(desc, size, task, coords, "GrowingWindowCV") } instantiateResampleInstance.CVHelperDesc = function(desc, size, task = NULL) { - if (desc$iters > size) + if (desc$iters > size) { stopf("Cannot use more folds (%i) than size (%i)!", desc$iters, size) + } test.inds = chunk(seq_len(size), shuffle = TRUE, n.chunks = desc$iters) makeResampleInstanceInternal(desc, size, test.inds = test.inds) } diff --git a/R/ResamplePrediction.R b/R/ResamplePrediction.R index 4748871b5a..fd50b47e91 100644 --- a/R/ResamplePrediction.R +++ b/R/ResamplePrediction.R @@ -1,4 +1,4 @@ -#FIXME: where does time exactly come from? only test preds? +# FIXME: where does time exactly come from? only test preds? #' Prediction from resampling. #' @@ -16,6 +16,7 @@ NULL makeResamplePrediction = function(instance, preds.test, preds.train, task.desc) { + tenull = sapply(preds.test, is.null) trnull = sapply(preds.train, is.null) if (any(tenull)) pr.te = preds.test[!tenull] else pr.te = preds.test @@ -47,6 +48,7 @@ makeResamplePrediction = function(instance, preds.test, preds.train, task.desc) #' @export print.ResamplePrediction = function(x, ...) { + cat("Resampled Prediction for:\n") print(x$instance$desc) catf("predict.type: %s", x$predict.type) diff --git a/R/ResampleResult.R b/R/ResampleResult.R index 5fb92db3ce..7e52ce4e16 100644 --- a/R/ResampleResult.R +++ b/R/ResampleResult.R @@ -63,6 +63,7 @@ NULL #' @export print.ResampleResult = function(x, ...) { + cat("Resample Result\n") catf("Task: %s", x$task.id) catf("Learner: %s", x$learner.id) diff --git a/R/ResampleResult_operators.R b/R/ResampleResult_operators.R index b3056196f5..803d735f3e 100644 --- a/R/ResampleResult_operators.R +++ b/R/ResampleResult_operators.R @@ -9,10 +9,12 @@ #' @export #' @family resample getRRPredictions = function(res) { - if (is.null(res$pred)) + + if (is.null(res$pred)) { stopf("The 'pred' slot is empty because the ResampleResult was generated with keep.pred = FALSE.") - else + } else { res$pred + } } #' @title Get task description from resample results (DEPRECATED). @@ -26,6 +28,7 @@ getRRPredictions = function(res) { #' @export #' @family resample getRRTaskDescription = function(res) { + .Deprecated("getRRTaskDesc") getRRTaskDesc(res) } @@ -41,6 +44,7 @@ getRRTaskDescription = function(res) { #' @export #' @family resample getRRTaskDesc = function(res) { + res$task.desc } @@ -63,6 +67,7 @@ getRRTaskDesc = function(res) { #' @export #' @family resample getRRPredictionList = function(res, ...) { + assertClass(res, "ResampleResult") # We need to force keep.pred = TRUE (will be checked in getRRPredictions) pred = getRRPredictions(res) @@ -75,17 +80,19 @@ getRRPredictionList = function(res, ...) { # get prediction objects for train and test set prediction = lapply(set, function(s) { + # split by resample iterations p.split = pred$data[pred$data$set == s, , drop = FALSE] p.split = split(p.split, as.factor(p.split$iter)) # create prediction object for each resample iteration p.split = lapply(p.split, function(p) { + # get predictions based on predict.type if (predict.type == "prob") { y = p[, stri_startswith_fixed(colnames(p), "prob."), drop = FALSE] # we need to remove the "prob." part in the colnames, otherwise # makePrediction thinks that the factor starts with "prob." - colnames(y) = stri_replace_first_fixed(colnames(y), "prob.", replacement = "") + colnames(y) = stri_replace_first_fixed(colnames(y), "prob.", replacement = "") } else if (predict.type == "se") { y = as.matrix(p[c("response", "se")]) } else { @@ -117,6 +124,7 @@ getRRPredictionList = function(res, ...) { #' @export #' @family resample addRRMeasure = function(res, measures) { + assertClass(res, "ResampleResult") if (inherits(measures, "Measure")) measures = list(measures) @@ -132,20 +140,27 @@ addRRMeasure = function(res, measures) { # recompute missing performance for train and/or test set set = names(pred)[!vlapply(pred, is.null)] perf = setNames(lapply(set, function(s) { + as.data.frame(do.call("rbind", lapply(pred[[s]], function(p) { + ret = performance(p, measures) matrix(ret, ncol = length(measures), dimnames = list(NULL, names(ret))) }))) }), set) # add missing measures to resample result - if (is.null(perf$train)) - res$measures.train[, missing.measures] = NA else - res$measures.train = cbind(res$measures.train, perf$train[, missing.measures, drop = FALSE]) - if (is.null(perf$test)) - res$measures.test[, missing.measures] = NA else - res$measures.test = cbind(res$measures.test, perf$test[, missing.measures, drop = FALSE]) + if (is.null(perf$train)) { + res$measures.train[, missing.measures] = NA + } else { + res$measures.train = cbind(res$measures.train, perf$train[, missing.measures, drop = FALSE]) + } + if (is.null(perf$test)) { + res$measures.test[, missing.measures] = NA + } else { + res$measures.test = cbind(res$measures.test, perf$test[, missing.measures, drop = FALSE]) + } aggr = vnapply(measures[measures.id %in% missing.measures], function(m) { + m$aggr$fun(task = NULL, perf.test = res$measures.test[, m$id], perf.train = res$measures.train[, m$id], @@ -177,5 +192,6 @@ addRRMeasure = function(res, measures) { #' @family debug #' @export getRRDump = function(res) { + return(res$err.dumps) } diff --git a/R/StackedLearner.R b/R/StackedLearner.R index 170a775f8b..27181d6fa8 100644 --- a/R/StackedLearner.R +++ b/R/StackedLearner.R @@ -67,26 +67,26 @@ #' \item{s}{the standard deviation of each numerical feature} #' } #' @examples -#' # Classification -#' data(iris) -#' tsk = makeClassifTask(data = iris, target = "Species") -#' base = c("classif.rpart", "classif.lda", "classif.svm") -#' lrns = lapply(base, makeLearner) -#' lrns = lapply(lrns, setPredictType, "prob") -#' m = makeStackedLearner(base.learners = lrns, -#' predict.type = "prob", method = "hill.climb") -#' tmp = train(m, tsk) -#' res = predict(tmp, tsk) -#' -#' # Regression -#' data(BostonHousing, package = "mlbench") -#' tsk = makeRegrTask(data = BostonHousing, target = "medv") -#' base = c("regr.rpart", "regr.svm") -#' lrns = lapply(base, makeLearner) -#' m = makeStackedLearner(base.learners = lrns, -#' predict.type = "response", method = "compress") -#' tmp = train(m, tsk) -#' res = predict(tmp, tsk) +#' # Classification +#' data(iris) +#' tsk = makeClassifTask(data = iris, target = "Species") +#' base = c("classif.rpart", "classif.lda", "classif.svm") +#' lrns = lapply(base, makeLearner) +#' lrns = lapply(lrns, setPredictType, "prob") +#' m = makeStackedLearner(base.learners = lrns, +#' predict.type = "prob", method = "hill.climb") +#' tmp = train(m, tsk) +#' res = predict(tmp, tsk) +#' +#' # Regression +#' data(BostonHousing, package = "mlbench") +#' tsk = makeRegrTask(data = BostonHousing, target = "medv") +#' base = c("regr.rpart", "regr.svm") +#' lrns = lapply(base, makeLearner) +#' m = makeStackedLearner(base.learners = lrns, +#' predict.type = "response", method = "compress") +#' tmp = train(m, tsk) +#' res = predict(tmp, tsk) #' @noMd #' @export makeStackedLearner = function(base.learners, super.learner = NULL, predict.type = NULL, @@ -114,23 +114,29 @@ makeStackedLearner = function(base.learners, super.learner = NULL, predict.type pts = unique(extractSubList(base.learners, "predict.type")) if ("se" %in% pts || (!is.null(predict.type) && predict.type == "se") || - (!is.null(super.learner) && super.learner$predict.type == "se")) + (!is.null(super.learner) && super.learner$predict.type == "se")) { stop("Predicting standard errors currently not supported.") - if (length(pts) > 1L) + } + if (length(pts) > 1L) { stop("Base learner must all have the same predict type!") - if ((method == "average" || method == "hill.climb") & (!is.null(super.learner) || is.null(predict.type))) + } + if ((method == "average" || method == "hill.climb") & (!is.null(super.learner) || is.null(predict.type))) { stop("No super learner needed for this method or the 'predict.type' is not specified.") - if (method != "average" & method != "hill.climb" & is.null(super.learner)) + } + if (method != "average" & method != "hill.climb" & is.null(super.learner)) { stop("You have to specify a super learner for this method.") - #if (method != "average" & !is.null(predict.type)) + } + # if (method != "average" & !is.null(predict.type)) # stop("Predict type has to be specified within the super learner.") - if ((method == "average" || method == "hill.climb") & use.feat) + if ((method == "average" || method == "hill.climb") & use.feat) { stop("The original features can not be used for this method") - if (!inherits(resampling, "CVDesc")) + } + if (!inherits(resampling, "CVDesc")) { stop("Currently only CV is allowed for resampling!") + } # lrn$predict.type is "response" by default change it using setPredictType - lrn = makeBaseEnsemble( + lrn = makeBaseEnsemble( id = "stack", base.learners = base.learners, cl = "StackedLearner" @@ -168,6 +174,7 @@ makeStackedLearner = function(base.learners, super.learner = NULL, predict.type #' #' @export getStackedBaseLearnerPredictions = function(model, newdata = NULL) { + # get base learner and predict type bms = model$learner.model$base.models method = model$learner.model$method @@ -183,13 +190,14 @@ getStackedBaseLearnerPredictions = function(model, newdata = NULL) { probs[[i]] = getResponse(pred, full.matrix = ifelse(method %in% c("average", "hill.climb"), TRUE, FALSE)) } - names(probs) = sapply(bms, function(X) X$learner$id) #names(.learner$base.learners) + names(probs) = sapply(bms, function(X) X$learner$id) # names(.learner$base.learners) } return(probs) } #' @export trainLearner.StackedLearner = function(.learner, .task, .subset, ...) { + # reduce to subset we want to train ensemble on .task = subsetTask(.task, subset = .subset) switch(.learner$method, @@ -206,6 +214,7 @@ trainLearner.StackedLearner = function(.learner, .task, .subset, ...) { # won't use the crossvalidated predictions (for method = "stack.cv"). #' @export predictLearner.StackedLearner = function(.learner, .model, .newdata, ...) { + use.feat = .model$learner$use.feat # get predict.type from learner and super model (if available) @@ -254,8 +263,8 @@ predictLearner.StackedLearner = function(.learner, .model, .newdata, ...) { # if super learner predictions should be probabilities, iter over rows to get proportions # FIXME: this is very slow + CUMBERSOME. we also do it in more places # we need a bbmisc fun for counting proportions in rows or cols - #probs = apply(probs, 1L, function(x) (table(factor(x, td$class.levels))/length(x))) - #return(setColNames(t(probs), td$class.levels)) + # probs = apply(probs, 1L, function(x) (table(factor(x, td$class.levels))/length(x))) + # return(setColNames(t(probs), td$class.levels)) probs = rowiseRatio(probs, td$class.levels, model.weight) return(probs) } else { @@ -265,7 +274,7 @@ predictLearner.StackedLearner = function(.learner, .model, .newdata, ...) { } if (type == "regr") { # if base learner predictions are responses for regression - prob = Reduce("+", probs) / length(probs) #rowMeans(probs) + prob = Reduce("+", probs) / length(probs) # rowMeans(probs) return(prob) } } @@ -300,6 +309,7 @@ predictLearner.StackedLearner = function(.learner, .model, .newdata, ...) { # Sets the predict.type for the super learner of a stacked learner #' @export setPredictType.StackedLearner = function(learner, predict.type) { + lrn = setPredictType.Learner(learner, predict.type) lrn$predict.type = predict.type if ("super.learner" %in% names(lrn)) lrn$super.learner$predict.type = predict.type @@ -310,6 +320,7 @@ setPredictType.StackedLearner = function(learner, predict.type) { # super simple averaging of base-learner predictions without weights. we should beat this averageBaseLearners = function(learner, task) { + bls = learner$base.learners base.models = probs = vector("list", length(bls)) for (i in seq_along(bls)) { @@ -322,11 +333,12 @@ averageBaseLearners = function(learner, task) { } names(probs) = names(bls) list(method = "average", base.models = base.models, super.model = NULL, - pred.train = probs) + pred.train = probs) } # stacking where we predict the training set in-sample, then super-learn on that stackNoCV = function(learner, task) { + td = getTaskDesc(task) type = ifelse(td$type == "regr", "regr", ifelse(length(td$class.levels) == 2L, "classif", "multiclassif")) @@ -347,7 +359,7 @@ stackNoCV = function(learner, task) { if (type == "regr" || type == "classif") { probs = as.data.frame(probs) } else { - probs = as.data.frame(lapply(probs, function(X) X)) #X[, -ncol(X)])) + probs = as.data.frame(lapply(probs, function(X) X)) # X[, -ncol(X)])) } # now fit the super learner for predicted_probs --> target @@ -364,11 +376,12 @@ stackNoCV = function(learner, task) { } super.model = train(learner$super.learner, super.task) list(method = "stack.no.cv", base.models = base.models, - super.model = super.model, pred.train = pred.train) + super.model = super.model, pred.train = pred.train) } # stacking where we crossval the training set with the base learners, then super-learn on that stackCV = function(learner, task) { + td = getTaskDesc(task) type = ifelse(td$type == "regr", "regr", ifelse(length(td$class.levels) == 2L, "classif", "multiclassif")) @@ -389,7 +402,7 @@ stackCV = function(learner, task) { if (type == "regr" || type == "classif") { probs = as.data.frame(probs) } else { - probs = as.data.frame(lapply(probs, function(X) X)) #X[, -ncol(X)])) + probs = as.data.frame(lapply(probs, function(X) X)) # X[, -ncol(X)])) } # add true target column IN CORRECT ORDER @@ -404,7 +417,7 @@ stackCV = function(learner, task) { probs = probs[order(test.inds), , drop = FALSE] if (use.feat) { # add data with normal features IN CORRECT ORDER - feat = getTaskData(task)#[test.inds, ] + feat = getTaskData(task) # [test.inds, ] feat = feat[, !colnames(feat) %in% tn, drop = FALSE] pred.data = cbind(probs, feat) super.task = makeSuperLearnerTask(learner, data = pred.data, target = tn) @@ -413,7 +426,7 @@ stackCV = function(learner, task) { } super.model = train(learner$super.learner, super.task) list(method = "stack.cv", base.models = base.models, - super.model = super.model, pred.train = pred.train) + super.model = super.model, pred.train = pred.train) } hillclimbBaseLearners = function(learner, task, replace = TRUE, init = 0, bagprob = 1, bagtime = 1, @@ -426,12 +439,13 @@ hillclimbBaseLearners = function(learner, task, replace = TRUE, init = 0, bagpro td = getTaskDesc(task) type = ifelse(td$type == "regr", "regr", - ifelse(length(td$class.levels) == 2L, "classif", "multiclassif")) + ifelse(length(td$class.levels) == 2L, "classif", "multiclassif")) if (is.null(metric)) { if (type == "regr") { metric = function(pred, true) mean((pred - true)^2) } else { metric = function(pred, true) { + pred = colnames(pred)[max.col(pred)] tb = table(pred, true) return(1 - sum(diag(tb)) / sum(tb)) @@ -443,8 +457,9 @@ hillclimbBaseLearners = function(learner, task, replace = TRUE, init = 0, bagpro bls = learner$base.learners if (type != "regr") { for (i in seq_along(bls)) { - if (bls[[i]]$predict.type == "response") + if (bls[[i]]$predict.type == "response") { stop("Hill climbing algorithm only takes probability predict type for classification.") + } } } # cross-validate all base learners and get a prob vector for the whole dataset for each learner @@ -532,10 +547,11 @@ hillclimbBaseLearners = function(learner, task, replace = TRUE, init = 0, bagpro weights = weights / sum(weights) list(method = "hill.climb", base.models = base.models, super.model = NULL, - pred.train = probs, weights = weights) + pred.train = probs, weights = weights) } compressBaseLearners = function(learner, task, parset = list()) { + lrn = learner lrn$method = "hill.climb" ensemble.model = train(lrn, task) @@ -554,7 +570,7 @@ compressBaseLearners = function(learner, task, parset = list()) { if (type == "regr") { new.task = makeRegrTask(data = pseudo.data, target = "target") if (is.null(learner$super.learner)) { - m = makeLearner("regr.nnet", predict.type = ) # nolint + m = makeLearner("regr.nnet", predict.type = ) # nolint } else { m = learner$super.learner } @@ -570,13 +586,14 @@ compressBaseLearners = function(learner, task, parset = list()) { super.model = train(m, new.task) list(method = "compress", base.learners = lrn$base.learners, super.model = super.model, - pred.train = pseudo.data) + pred.train = pseudo.data) } ### other helpers ### # Returns response for correct usage in stackNoCV and stackCV and for predictions getResponse = function(pred, full.matrix = TRUE) { + # if classification with probabilities if (pred$predict.type == "prob") { if (full.matrix) { @@ -597,6 +614,7 @@ getResponse = function(pred, full.matrix = TRUE) { # Create a super learner task makeSuperLearnerTask = function(learner, data, target) { + if (learner$super.learner$type == "classif") { makeClassifTask(data = data, target = target) } else { @@ -606,6 +624,7 @@ makeSuperLearnerTask = function(learner, data, target) { # Count the ratio rowiseRatio = function(probs, levels, model.weight = NULL) { + m = length(levels) p = ncol(probs) if (is.null(model.weight)) { @@ -623,14 +642,16 @@ rowiseRatio = function(probs, levels, model.weight = NULL) { } getPseudoData = function(.data, k = 3, prob = 0.1, s = NULL, ...) { + res = NULL n = nrow(.data) ori.names = names(.data) feat.class = sapply(.data, class) ind2 = which(feat.class == "factor") ind1 = setdiff(seq_len(ncol(.data)), ind2) - if (length(ind2) > 0) + if (length(ind2) > 0) { ori.labels = lapply(.data[[ind2]], levels) + } .data = lapply(.data, as.numeric) .data = as.data.frame(.data) # Normalization @@ -651,6 +672,7 @@ getPseudoData = function(.data, k = 3, prob = 0.1, s = NULL, ...) { # Func to calc dist hamming = function(mat) { + n = nrow(mat) m = ncol(mat) res = matrix(0, n, n) @@ -666,6 +688,7 @@ getPseudoData = function(.data, k = 3, prob = 0.1, s = NULL, ...) { } one.nn = function(mat, ind1, ind2) { + n = nrow(mat) dist.mat.1 = matrix(0, n, n) dist.mat.2 = matrix(0, n, n) @@ -737,4 +760,3 @@ getPseudoData = function(.data, k = 3, prob = 0.1, s = NULL, ...) { # - DONE: add option to use normal features in super learner # - DONE: super learner can also return predicted probabilites # - DONE: allow regression as well - diff --git a/R/SupervisedTask.R b/R/SupervisedTask.R index ba3b2b7b02..41c1f27d2a 100644 --- a/R/SupervisedTask.R +++ b/R/SupervisedTask.R @@ -1,12 +1,14 @@ makeSupervisedTask = function(type, data, target, weights, blocking, fixup.data, check.data, coordinates) { + task = makeTask(type = type, data = data, weights = weights, blocking = blocking, fixup.data = fixup.data, check.data = check.data, coordinates = coordinates) if (check.data) { # costsens does not have a target col... # assertCharacter(target, any.missing = FALSE, min.len = 1L) w = which.first(target %nin% colnames(data)) - if (length(w) > 0L) + if (length(w) > 0L) { stopf("Column names of data doesn't contain target var: %s", target[w]) + } checkTaskData(task$env$data, cols = setdiff(colnames(data), target)) } @@ -15,19 +17,23 @@ makeSupervisedTask = function(type, data, target, weights, blocking, fixup.data, #' @export print.SupervisedTask = function(x, print.target = TRUE, print.weights = TRUE, ...) { + td = x$task.desc catf("Supervised task: %s", td$id) catf("Type: %s", td$type) - if (print.target) + if (print.target) { catf("Target: %s", collapse(td$target)) - if (inherits(x, "SurvTask")) + } + if (inherits(x, "SurvTask")) { catf("Events: %i", sum(getTaskTargets(x)[, 2L])) + } catf("Observations: %i", td$size) catf("Features:") catf(printToChar(td$n.feat, collapse = "\n")) catf("Missings: %s", td$has.missings) - if (print.weights) + if (print.weights) { catf("Has weights: %s", td$has.weights) + } catf("Has blocking: %s", td$has.blocking) catf("Has coordinates: %s", td$has.coordinates) } diff --git a/R/SurvTask.R b/R/SurvTask.R index bf32aa7352..a61ab070dd 100644 --- a/R/SurvTask.R +++ b/R/SurvTask.R @@ -1,6 +1,7 @@ #' @rdname Task #' @export makeSurvTask = function(id = deparse(substitute(data)), data, target, weights = NULL, blocking = NULL, coordinates = NULL, fixup.data = "warn", check.data = TRUE) { + assertString(id) assertDataFrame(data) assertCharacter(target, any.missing = FALSE, len = 2L) @@ -12,12 +13,14 @@ makeSurvTask = function(id = deparse(substitute(data)), data, target, weights = time = data[[target[1L]]] event = data[[target[2L]]] - if (is.integer(time)) + if (is.integer(time)) { data[[target[1L]]] = as.double(time) + } if (is.numeric(event)) { - if (testIntegerish(event) && all(as.integer(event) %in% c(0L, 1L))) + if (testIntegerish(event) && all(as.integer(event) %in% c(0L, 1L))) { data[[target[2L]]] = (as.integer(event) == 1L) + } } else if (is.factor(event)) { lvls = levels(event) if (length(lvls) == 2L) { @@ -45,6 +48,7 @@ makeSurvTask = function(id = deparse(substitute(data)), data, target, weights = #' @export #' @rdname makeTaskDesc makeSurvTaskDesc = function(id, data, target, weights, blocking, coordinates) { + td = makeTaskDescInternal("surv", id, data, target, weights, blocking, coordinates) addClasses(td, c("SurvTaskDesc", "SupervisedTaskDesc")) } diff --git a/R/Task.R b/R/Task.R index 899550c1cd..d657e47dc3 100644 --- a/R/Task.R +++ b/R/Task.R @@ -86,7 +86,7 @@ #' library(mlbench) #' data(BostonHousing) #' data(Ionosphere) -#' +#' #' makeClassifTask(data = iris, target = "Species") #' makeRegrTask(data = BostonHousing, target = "medv") #' # an example of a classification task with more than those standard arguments: @@ -117,6 +117,7 @@ NULL NULL makeTask = function(type, data, weights = NULL, blocking = NULL, fixup.data = "warn", check.data = TRUE, coordinates = NULL) { + if (fixup.data != "no") { if (fixup.data == "quiet") { data = droplevels(data) @@ -130,8 +131,9 @@ makeTask = function(type, data, weights = NULL, blocking = NULL, fixup.data = "w data[[i]] = droplevels(x) } } - if (any(dropped)) + if (any(dropped)) { warningf("Empty factor levels were dropped for columns: %s", collapse(colnames(data)[dropped])) + } } } @@ -141,12 +143,14 @@ makeTask = function(type, data, weights = NULL, blocking = NULL, fixup.data = "w warningf("Provided data is not a pure data.frame but from class %s, hence it will be converted.", class(data)[1]) data = as.data.frame(data) } - if (!is.null(weights)) + if (!is.null(weights)) { assertNumeric(weights, len = nrow(data), any.missing = FALSE, lower = 0) + } if (!is.null(blocking)) { assertFactor(blocking, len = nrow(data), any.missing = FALSE) - if (length(blocking) && length(blocking) != nrow(data)) + if (length(blocking) && length(blocking) != nrow(data)) { stop("Blocking has to be of the same length as number of rows in data! Or pass none at all.") + } } if (!is.null(coordinates)) { if (nrow(coordinates) != nrow(data)) { @@ -174,15 +178,20 @@ makeTask = function(type, data, weights = NULL, blocking = NULL, fixup.data = "w } checkTaskData = function(data, cols = names(data)) { + fun = function(cn, x) { + if (is.numeric(x)) { - if (anyInfinite(x)) + if (anyInfinite(x)) { stopf("Column '%s' contains infinite values.", cn) - if (anyNaN(x)) + } + if (anyNaN(x)) { stopf("Column '%s' contains NaN values.", cn) + } } else if (is.factor(x)) { - if (hasEmptyLevels(x)) + if (hasEmptyLevels(x)) { stopf("Column '%s' contains empty factor levels.", cn) + } } else { stopf("Unsupported feature type (%s) in column '%s'.", class(x)[1L], cn) } @@ -194,6 +203,7 @@ checkTaskData = function(data, cols = names(data)) { #' @export print.Task = function(x, print.weights = TRUE, ...) { + td = x$task.desc catf("Task: %s", td$id) catf("Type: %s", td$type) @@ -201,8 +211,9 @@ print.Task = function(x, print.weights = TRUE, ...) { catf("Features:") catf(printToChar(td$n.feat, collapse = "\n")) catf("Missings: %s", td$has.missings) - if (print.weights) + if (print.weights) { catf("Has weights: %s", td$has.weights) + } catf("Has blocking: %s", td$has.blocking) catf("Has coordinates: %s", td$has.coordinates) } diff --git a/R/TaskDesc.R b/R/TaskDesc.R index 48cb5ab385..5c9ad8fd0b 100644 --- a/R/TaskDesc.R +++ b/R/TaskDesc.R @@ -52,6 +52,7 @@ NULL #' @keywords internal #' @export makeTaskDescInternal = function(type, id, data, target, weights, blocking, coordinates) { + # get classes of feature cols cl = vcapply(data, function(x) class(x)[1L]) cl = table(dropNamed(cl, target)) diff --git a/R/Task_operators.R b/R/Task_operators.R index 2f41fdceae..8552eb6e46 100644 --- a/R/Task_operators.R +++ b/R/Task_operators.R @@ -6,18 +6,21 @@ #' @export #' @family task getTaskDesc = function(x) { + UseMethod("getTaskDesc") } #' @export getTaskDesc.default = function(x) { + # FIXME: would be much cleaner to specialize here x$task.desc } #' @export getTaskDesc.TaskDesc = function(x) { + x } @@ -25,6 +28,7 @@ getTaskDesc.TaskDesc = function(x) { #' @inheritParams getTaskDesc #' @export getTaskDescription = function(x) { + .Deprecated("getTaskDesc") getTaskDesc(x) } @@ -37,6 +41,7 @@ getTaskDescription = function(x) { #' @export #' @family task getTaskType = function(x) { + getTaskDesc(x)$type } @@ -48,6 +53,7 @@ getTaskType = function(x) { #' @export #' @family task getTaskId = function(x) { + getTaskDesc(x)$id } @@ -62,21 +68,25 @@ getTaskId = function(x) { #' @export #' @family task getTaskTargetNames = function(x) { + UseMethod("getTaskTargetNames") } #' @export getTaskTargetNames.Task = function(x) { + getTaskTargetNames(getTaskDesc(x)) } #' @export getTaskTargetNames.SupervisedTaskDesc = function(x) { + x$target } #' @export getTaskTargetNames.UnsupervisedTaskDesc = function(x) { + character(0L) } @@ -92,26 +102,31 @@ getTaskTargetNames.UnsupervisedTaskDesc = function(x) { #' @export #' @family task getTaskClassLevels = function(x) { + UseMethod("getTaskClassLevels") } #' @export getTaskClassLevels.ClassifTask = function(x) { + getTaskClassLevels(getTaskDesc(x)) } #' @export getTaskClassLevels.MultilabelTask = function(x) { + getTaskClassLevels(getTaskDesc(x)) } #' @export getTaskClassLevels.ClassifTaskDesc = function(x) { + getTaskDesc(x)$class.levels } #' @export getTaskClassLevels.MultilabelTaskDesc = function(x) { + getTaskDesc(x)$class.levels } @@ -124,11 +139,13 @@ getTaskClassLevels.MultilabelTaskDesc = function(x) { #' @family task #' @export getTaskFeatureNames = function(task) { + UseMethod("getTaskFeatureNames") } #' @export getTaskFeatureNames.Task = function(task) { + setdiff(names(task$env$data), getTaskDesc(task)$target) } @@ -140,6 +157,7 @@ getTaskFeatureNames.Task = function(task) { #' @export #' @family task getTaskNFeats = function(x) { + sum(getTaskDesc(x)$n.feat) } @@ -151,6 +169,7 @@ getTaskNFeats = function(x) { #' @export #' @family task getTaskSize = function(x) { + getTaskDesc(x)$size } @@ -174,6 +193,7 @@ getTaskSize = function(x) { #' @family task #' @export getTaskFormula = function(x, target = getTaskTargetNames(x), explicit.features = FALSE, env = parent.frame()) { + assertCharacter(target, any.missing = FALSE) assertFlag(explicit.features) assertEnvironment(env) @@ -189,8 +209,9 @@ getTaskFormula = function(x, target = getTaskTargetNames(x), explicit.features = stop("There is no formula available for clustering.") } if (explicit.features) { - if (!inherits(x, "Task")) + if (!inherits(x, "Task")) { stopf("'explicit.features' can only be used when 'x' is of type 'Task'!") + } features = getTaskFeatureNames(x) } else { features = "." @@ -216,22 +237,26 @@ getTaskFormula = function(x, target = getTaskTargetNames(x), explicit.features = #' task = makeClassifTask(data = iris, target = "Species") #' getTaskTargets(task) getTaskTargets = function(task, recode.target = "no") { + UseMethod("getTaskTargets") } #' @export getTaskTargets.SupervisedTask = function(task, recode.target = "no") { + y = task$env$data[, task$task.desc$target, drop = TRUE] recodeY(y, recode.target, task$task.desc) } #' @export getTaskTargets.UnsupervisedTask = function(task, recode.target = "no") { + stop("There is no target available for unsupervised tasks.") } #' @export getTaskTargets.CostSensTask = function(task, recode.target = "no") { + stop("There is no target available for costsens tasks.") } @@ -271,7 +296,7 @@ getTaskTargets.CostSensTask = function(task, recode.target = "no") { #' @examples #' library("mlbench") #' data(BreastCancer) -#' +#' #' df = BreastCancer #' df$Id = NULL #' task = makeClassifTask(id = "BreastCancer", data = df, target = "Class", positive = "malignant") @@ -280,6 +305,7 @@ getTaskTargets.CostSensTask = function(task, recode.target = "no") { #' head(getTaskData(task, subset = 1:100, recode.target = "01")) getTaskData = function(task, subset = NULL, features, target.extra = FALSE, recode.target = "no", functionals.as = "dfcols") { + checkTask(task, "Task") checkTaskSubset(subset, size = task$task.desc$size) assertLogical(target.extra) @@ -295,13 +321,15 @@ getTaskData = function(task, subset = NULL, features, target.extra = FALSE, reco checkLogical(features), checkCharacter(features) ) - if (!is.character(features)) + if (!is.character(features)) { features = task.features[features] + } } tn = task$task.desc$target indexHelper = function(df, i, j, drop = TRUE, functionals.as) { + df = switch(2L * is.null(i) + is.null(j) + 1L, df[i, j, drop = drop], df[i, , drop = drop], @@ -316,18 +344,20 @@ getTaskData = function(task, subset = NULL, features, target.extra = FALSE, reco } if (target.extra) { - if (missing(features)) + if (missing(features)) { features = task.features + } res = list( data = indexHelper(task$env$data, subset, setdiff(features, tn), drop = FALSE, functionals.as), # in the next line we should not rtouch functionals anyway (just Y), so let us keep them as matrix target = recodeY(indexHelper(task$env$data, subset, tn, functionals.as = "matrix"), type = recode.target, task$task.desc) ) } else { - if (missing(features) || identical(features, task.features)) + if (missing(features) || identical(features, task.features)) { features = NULL - else + } else { features = union(features, tn) + } res = indexHelper(task$env$data, subset, features, drop = FALSE, functionals.as) if (recode.target %nin% c("no", "surv")) { @@ -338,18 +368,25 @@ getTaskData = function(task, subset = NULL, features, target.extra = FALSE, reco } recodeY = function(y, type, td) { - if (type == "no") + + if (type == "no") { return(y) - if (type == "drop.levels") + } + if (type == "drop.levels") { return(factor(y)) - if (type == "01") + } + if (type == "01") { return(as.numeric(y == td$positive)) - if (type == "-1+1") + } + if (type == "-1+1") { return(as.numeric(2L * (y == td$positive) - 1L)) - if (type == "surv") + } + if (type == "surv") { return(Surv(y[, 1L], y[, 2L], type = "right")) - if (type == "multilabel.factor") + } + if (type == "multilabel.factor") { return(lapply(y, function(x) factor(x, levels = c("TRUE", "FALSE")))) + } stopf("Unknown value for 'type': %s", type) } @@ -365,16 +402,19 @@ recodeY = function(y, type, td) { #' @family task #' @export getTaskCosts = function(task, subset = NULL) { + UseMethod("getTaskCosts") } #' @export getTaskCosts.Task = function(task, subset = NULL) { + NULL } #' @export getTaskCosts.CostSensTask = function(task, subset = NULL) { + subset = checkTaskSubset(subset, size = getTaskDesc(task)$size) getTaskDesc(task)$costs[subset, , drop = FALSE] } @@ -393,17 +433,21 @@ getTaskCosts.CostSensTask = function(task, subset = NULL) { #' task = makeClassifTask(data = iris, target = "Species") #' subsetTask(task, subset = 1:100) subsetTask = function(task, subset = NULL, features) { + # FIXME: we recompute the taskdesc for each subsetting. do we want that? speed? # FIXME: maybe we want this independent of changeData? # Keep functionals here as they are (matrix) task = changeData(task, getTaskData(task, subset, features, functionals.as = "matrix"), getTaskCosts(task, subset), task$weights) if (!is.null(subset)) { - if (task$task.desc$has.blocking) + if (task$task.desc$has.blocking) { task$blocking = task$blocking[subset] - if (task$task.desc$has.weights) + } + if (task$task.desc$has.weights) { task$weights = task$weights[subset] - if (task$task.desc$has.coordinates) + } + if (task$task.desc$has.coordinates) { task$coordinates = task$coordinates[subset, ] + } } return(task) } @@ -424,17 +468,22 @@ subsetTask = function(task, subset = NULL, features) { #' @keywords internal #' @export changeData = function(task, data, costs, weights, coordinates) { - if (missing(data)) + + if (missing(data)) { data = getTaskData(task) - if (missing(costs)) + } + if (missing(costs)) { costs = getTaskCosts(task) - if (missing(weights)) + } + if (missing(weights)) { weights = task$weights - if (missing(coordinates)) + } + if (missing(coordinates)) { coordinates = task$coordinates + } task$env = new.env(parent = emptyenv()) task$env$data = data - task["weights"] = list(weights) # so also 'NULL' gets set + task["weights"] = list(weights) # so also 'NULL' gets set td = task$task.desc # FIXME: this is bad style but I see no other way right now task$task.desc = switch(td$type, @@ -453,10 +502,12 @@ changeData = function(task, data, costs, weights, coordinates) { # returns factor levels of all factors in a task a named list of char vecs # non chars do not occur in the output getTaskFactorLevels = function(task) { + cols = vlapply(task$env$data, is.factor) lapply(task$env$data[cols], levels) } getTaskWeights = function(task) { + task$weights } diff --git a/R/TuneControl.R b/R/TuneControl.R index ef3e1ed102..109c10d6ea 100644 --- a/R/TuneControl.R +++ b/R/TuneControl.R @@ -32,16 +32,20 @@ makeTuneControl = function(same.resampling.instance, impute.val = NULL, start = NULL, tune.threshold = FALSE, tune.threshold.args = list(), log.fun = "default", final.dw.perc = NULL, budget = NULL, ..., cl) { - if (!is.null(start)) + if (!is.null(start)) { assertList(start, min.len = 1L, names = "unique") - if (identical(log.fun, "default")) + } + if (identical(log.fun, "default")) { log.fun = logFunTune - else if (identical(log.fun, "memory")) + } else if (identical(log.fun, "memory")) { log.fun = logFunTuneMemory - if (!is.null(budget)) + } + if (!is.null(budget)) { budget = asCount(budget) - if (!is.null(final.dw.perc)) + } + if (!is.null(final.dw.perc)) { assertNumeric(final.dw.perc, lower = 0, upper = 1) + } x = makeOptControl(same.resampling.instance, impute.val, tune.threshold, tune.threshold.args, log.fun, final.dw.perc, ...) x$start = start x$budget = budget @@ -50,6 +54,7 @@ makeTuneControl = function(same.resampling.instance, impute.val = NULL, #' @export print.TuneControl = function(x, ...) { + catf("Tune control: %s", class(x)[1]) catf("Same resampling instance: %s", x$same.resampling.instance) catf("Imputation value: %s", ifelse(is.null(x$impute.val), "", sprintf("%g", x$impute.val))) @@ -58,4 +63,3 @@ print.TuneControl = function(x, ...) { catf("Tune threshold: %s", x$tune.threshold) catf("Further arguments: %s", convertToShortString(x$extra.args)) } - diff --git a/R/TuneControlDesign.R b/R/TuneControlDesign.R index a8c1848b32..b2f72169a6 100644 --- a/R/TuneControlDesign.R +++ b/R/TuneControlDesign.R @@ -14,6 +14,7 @@ #' @family tune #' @export makeTuneControlDesign = function(same.resampling.instance = TRUE, impute.val = NULL, design = NULL, tune.threshold = FALSE, tune.threshold.args = list(), log.fun = "default") { + assertDataFrame(design, min.rows = 1) budget = nrow(design) makeTuneControl(same.resampling.instance = same.resampling.instance, impute.val = impute.val, diff --git a/R/TuneControlGenSA.R b/R/TuneControlGenSA.R index 2f412f600a..5d54926610 100644 --- a/R/TuneControlGenSA.R +++ b/R/TuneControlGenSA.R @@ -22,17 +22,19 @@ makeTuneControlGenSA = function(same.resampling.instance = TRUE, impute.val = NU args = list(...) if (is.null(budget)) { - if (!is.null(args$max.call)) + if (!is.null(args$max.call)) { budget = args$max.call - else + } else { budget = args$max.call = 1e+07 + } } else { if (is.null(args$max.call)) { args$max.call = budget } else { - if (args$max.call != budget) + if (args$max.call != budget) { stopf("The given budget (%i) contradicts to the maximum number of function evaluations (max.call = %i).", budget, args$max.call) + } } } args$max.call = asCount(args$max.call) diff --git a/R/TuneControlGrid.R b/R/TuneControlGrid.R index 594b4b3d41..548ea7dbbd 100644 --- a/R/TuneControlGrid.R +++ b/R/TuneControlGrid.R @@ -23,6 +23,7 @@ makeTuneControlGrid = function(same.resampling.instance = TRUE, impute.val = NULL, resolution = 10L, tune.threshold = FALSE, tune.threshold.args = list(), log.fun = "default", final.dw.perc = NULL, budget = NULL) { + assert(checkIntegerish(resolution, lower = 1, any.missing = FALSE, names = "unique"), checkIntegerish(resolution, lower = 1, any.missing = FALSE, len = 1)) resolution = asInteger(resolution) diff --git a/R/TuneControlIrace.R b/R/TuneControlIrace.R index f4dbffd72c..853a71035d 100644 --- a/R/TuneControlIrace.R +++ b/R/TuneControlIrace.R @@ -42,16 +42,19 @@ makeTuneControlIrace = function(impute.val = NULL, n.instances = 100L, log.fun = log.fun, final.dw.perc = final.dw.perc, budget = budget, ..., cl = "TuneControlIrace") # argcheck maxExperiments - if (!is.null(x$extra.args$maxExperiments)) + if (!is.null(x$extra.args$maxExperiments)) { x$extra.args$maxExperiments = asCount(x$extra.args$maxExperiments) + } # check that budget and maxExperiments are the same if both given - if (!is.null(budget) && !is.null(x$extra.args$maxExperiments) && budget != x$extra.args$maxExperiments) + if (!is.null(budget) && !is.null(x$extra.args$maxExperiments) && budget != x$extra.args$maxExperiments) { stopf("The number of experiments (maxExperiments = %i) differs from the given budget (budget = %i).", x$extra.args$maxExperiments, budget) + } # now if budget was given, use it - if (!is.null(budget)) + if (!is.null(budget)) { x$extra.args$maxExperiments = x$budget + } return(x) } diff --git a/R/TuneControlMBO.R b/R/TuneControlMBO.R index 831334c836..9a71a7660f 100644 --- a/R/TuneControlMBO.R +++ b/R/TuneControlMBO.R @@ -44,10 +44,10 @@ makeTuneControlMBO = function(same.resampling.instance = TRUE, impute.val = NULL assertClass(mbo.control, "MBOControl") assertFlag(continue) - if (!is.null(budget) && !is.null(mbo.design) && nrow(mbo.design) > budget) + if (!is.null(budget) && !is.null(mbo.design) && nrow(mbo.design) > budget) { stopf("The size of the initial design (init.design.points = %i) exceeds the given budget (%i).", nrow(mbo.design), budget) - else if (!is.null(budget)) { + } else if (!is.null(budget)) { mbo.control = mlrMBO::setMBOControlTermination(mbo.control, max.evals = budget) } diff --git a/R/TuneControlRandom.R b/R/TuneControlRandom.R index b82c072de0..aa2bde4210 100644 --- a/R/TuneControlRandom.R +++ b/R/TuneControlRandom.R @@ -19,16 +19,20 @@ makeTuneControlRandom = function(same.resampling.instance = TRUE, maxit = NULL, tune.threshold.args = list(), log.fun = "default", final.dw.perc = NULL, budget = NULL) { # if we dont get neither budget nor maxit, set it to default, otherwise take one of the 2 - if (is.null(budget) && is.null(maxit)) + if (is.null(budget) && is.null(maxit)) { budget = maxit = 100L - if (!is.null(maxit)) + } + if (!is.null(maxit)) { maxit = asCount(maxit) - if (!is.null(budget)) + } + if (!is.null(budget)) { budget = asCount(budget) + } maxit = coalesce(maxit, budget) budget = coalesce(budget, maxit) - if (budget != maxit) - stopf("The parameters budget (%i) and maxit (%i) differ.", budget, maxit) + if (budget != maxit) { + stopf("The parameters budget (%i) and maxit (%i) differ.", budget, maxit) + } makeTuneControl(same.resampling.instance = same.resampling.instance, maxit = maxit, start = NULL, tune.threshold = tune.threshold, tune.threshold.args = tune.threshold.args, final.dw.perc = final.dw.perc, diff --git a/R/TuneMultiCritControl.R b/R/TuneMultiCritControl.R index 26abafe406..06fce919f7 100644 --- a/R/TuneMultiCritControl.R +++ b/R/TuneMultiCritControl.R @@ -41,14 +41,17 @@ makeTuneMultiCritControl = function(measures, same.resampling.instance, impute.val = NULL, log.fun = "default", final.dw.perc = NULL, budget = NULL, ..., cl) { assertFlag(same.resampling.instance) - if (!is.null(impute.val)) + if (!is.null(impute.val)) { assertNumeric(impute.val, any.missing = FALSE) - if (identical(log.fun, "default")) + } + if (identical(log.fun, "default")) { log.fun = logFunTune - else if (identical(log.fun, "memory")) + } else if (identical(log.fun, "memory")) { log.fun = logFunTuneMemory - if (!is.null(budget)) + } + if (!is.null(budget)) { budget = asCount(budget) + } x = makeOptControl(same.resampling.instance, impute.val, log.fun = log.fun, final.dw.perc = final.dw.perc, ...) x$budget = budget @@ -57,10 +60,10 @@ makeTuneMultiCritControl = function(measures, same.resampling.instance, #' @export print.TuneMultiCritControl = function(x, ...) { + catf("Tune multicrit control: %s", class(x)[1]) catf("Same resampling instance: %s", x$same.resampling.instance) catf("Imputation value: %s", ifelse(is.null(x$impute.val), "", collapse(sprintf("%g", x$impute.val)))) catf("Budget: %i", x$budget) catf("Further arguments: %s", convertToShortString(x$extra.args)) } - diff --git a/R/TuneMultiCritControlGrid.R b/R/TuneMultiCritControlGrid.R index fa04a2a5ec..66ee5781ab 100644 --- a/R/TuneMultiCritControlGrid.R +++ b/R/TuneMultiCritControlGrid.R @@ -14,4 +14,3 @@ makeTuneMultiCritControlGrid = function(same.resampling.instance = TRUE, resolution = resolution, log.fun = log.fun, final.dw.perc = final.dw.perc, budget = budget, cl = "TuneMultiCritControlGrid") } - diff --git a/R/TuneMultiCritControlMBO.R b/R/TuneMultiCritControlMBO.R index f71f3b2c78..d50319df9a 100644 --- a/R/TuneMultiCritControlMBO.R +++ b/R/TuneMultiCritControlMBO.R @@ -23,10 +23,10 @@ makeTuneMultiCritControlMBO = function(n.objectives = mbo.control$n.objectives, assertClass(mbo.control, "MBOControl") assertFlag(continue) - if (!is.null(budget) && !is.null(mbo.design) && nrow(mbo.design) > budget) + if (!is.null(budget) && !is.null(mbo.design) && nrow(mbo.design) > budget) { stopf("The size of the initial design (init.design.points = %i) exceeds the given budget (%i).", nrow(mbo.design), budget) - else if (!is.null(budget)) { + } else if (!is.null(budget)) { mbo.control = mlrMBO::setMBOControlTermination(mbo.control, max.evals = budget) } diff --git a/R/TuneMultiCritControlNSGA2.R b/R/TuneMultiCritControlNSGA2.R index a56f2bf18d..063f4f2a70 100644 --- a/R/TuneMultiCritControlNSGA2.R +++ b/R/TuneMultiCritControlNSGA2.R @@ -5,29 +5,32 @@ makeTuneMultiCritControlNSGA2 = function(same.resampling.instance = TRUE, args = list(...) if (is.null(args$popsize)) { - if (!is.null(budget) && !is.null(args$generations)) + if (!is.null(budget) && !is.null(args$generations)) { # define popsize via the number of generations and the budget args$popsize = budget %/% (max(args$generations) + 1L) - else + } else { # alternatively use the nsga2-default args$popsize = 100L + } } if (is.null(args$generations)) { - if (!is.null(budget)) + if (!is.null(budget)) { # define generations via popsize and the budget args$generations = (budget %/% args$popsize) - 1L - else + } else { # alternatively use the nsga2-default args$generations = 100L + } } # adapt budget to the population size and number of generations - if (is.null(budget)) + if (is.null(budget)) { budget = (max(args$generations) + 1) * args$popsize - else if (budget != (max(args$generations) + 1) * args$popsize) + } else if (budget != (max(args$generations) + 1) * args$popsize) { stopf("The given 'budget' (%i) contradicts the product of 'popsize' (%i) and 'max(generations) + 1' (%i)!", budget, args$popsize, args$generations + 1) + } # sanity checks and type conversion args$popsize = asCount(args$popsize, positive = TRUE) diff --git a/R/TuneMultiCritControlRandom.R b/R/TuneMultiCritControlRandom.R index 9e1c596f43..c8cbbe8bbc 100644 --- a/R/TuneMultiCritControlRandom.R +++ b/R/TuneMultiCritControlRandom.R @@ -6,12 +6,13 @@ makeTuneMultiCritControlRandom = function(same.resampling.instance = TRUE, maxit = 100L, log.fun = "default", final.dw.perc = NULL, budget = NULL) { - if (is.null(budget)) + if (is.null(budget)) { budget = maxit - else if (is.null(maxit)) + } else if (is.null(maxit)) { maxit = budget - else if (budget != maxit) + } else if (budget != maxit) { stopf("The parameters budget (%i) and maxit (%i) differ.", budget, maxit) + } maxit = asCount(maxit) budget = asCount(budget) @@ -19,4 +20,3 @@ makeTuneMultiCritControlRandom = function(same.resampling.instance = TRUE, maxit = maxit, log.fun = log.fun, final.dw.perc = final.dw.perc, budget = budget, cl = "TuneMultiCritControlRandom") } - diff --git a/R/TuneMultiCritResult.R b/R/TuneMultiCritResult.R index ca3077ae3d..3313beace3 100644 --- a/R/TuneMultiCritResult.R +++ b/R/TuneMultiCritResult.R @@ -25,6 +25,7 @@ #' @rdname TuneMultiCritResult NULL makeTuneMultiCritResult = function(learner, ind, x, y, resampling, control, opt.path, measures, ...) { + # set threshold to NULL, we can not currently tune for it in an MCO way or = makeOptResult(learner, control, x, y, resampling, NULL, opt.path, "TuneMultiCritResult", ...) or$ind = ind @@ -33,6 +34,7 @@ makeTuneMultiCritResult = function(learner, ind, x, y, resampling, control, opt. } makeTuneMultiCritResultFromOptPath = function(learner, par.set, measures, resampling, control, opt.path) { + j = getOptPathParetoFront(opt.path, index = TRUE) els = lapply(j, getOptPathEl, op = opt.path) xs = extractSubList(els, "x", simplify = FALSE) @@ -44,9 +46,9 @@ makeTuneMultiCritResultFromOptPath = function(learner, par.set, measures, resamp } -#'@export +#' @export print.TuneMultiCritResult = function(x, ...) { + catf("Tune multicrit result:") catf("Points on front: %i", length(x$x)) } - diff --git a/R/TuneResult.R b/R/TuneResult.R index 2cb2b45e1c..5bdd95a202 100644 --- a/R/TuneResult.R +++ b/R/TuneResult.R @@ -28,10 +28,12 @@ #' @rdname TuneResult NULL makeTuneResult = function(learner, control, x, y, resampling, threshold, opt.path, ...) { + makeOptResult(learner, control, x, y, resampling, threshold, opt.path, "TuneResult", ...) } makeTuneResultFromOptPath = function(learner, par.set, measures, resampling, control, opt.path) { + i = getOptPathBestIndex(opt.path, measureAggrName(measures[[1]]), ties = "random") e = getOptPathEl(opt.path, i) x = trafoValue(par.set, e$x) @@ -41,11 +43,13 @@ makeTuneResultFromOptPath = function(learner, par.set, measures, resampling, con } -#'@export +#' @export print.TuneResult = function(x, ...) { + catf("Tune result:") catf("Op. pars: %s", paramValueToString(x$opt.path$par.set, x$x)) - if (!is.null(x$threshold)) + if (!is.null(x$threshold)) { catf("Threshold: %s", collapse(sprintf("%2.2f", x$threshold))) + } catf("%s", perfsToString(x$y)) } diff --git a/R/TuneWrapper.R b/R/TuneWrapper.R index cc2e088cdf..bb1a6a8c40 100644 --- a/R/TuneWrapper.R +++ b/R/TuneWrapper.R @@ -42,6 +42,7 @@ #' getNestedTuneResultsX(r) #' } makeTuneWrapper = function(learner, resampling, measures, par.set, control, show.info = getMlrOption("show.info")) { + learner = checkLearner(learner) assert(checkClass(resampling, "ResampleDesc"), checkClass(resampling, "ResampleInstance")) measures = checkMeasures(measures, learner) @@ -50,16 +51,19 @@ makeTuneWrapper = function(learner, resampling, measures, par.set, control, show assertFlag(show.info) id = stri_paste(learner$id, "tuned", sep = ".") x = makeOptWrapper(id, learner, resampling, measures, par.set, character(0L), - function(){}, control, show.info, "TuneWrapper", "TuneModel") + function() { + + }, control, show.info, "TuneWrapper", "TuneModel") checkTunerParset(learner, par.set, measures, control) return(x) } #' @export -trainLearner.TuneWrapper = function(.learner, .task, .subset = NULL, ...) { +trainLearner.TuneWrapper = function(.learner, .task, .subset = NULL, ...) { + .task = subsetTask(.task, .subset) or = tuneParams(.learner$next.learner, .task, .learner$resampling, .learner$measures, - .learner$opt.pars, .learner$control, .learner$show.info) + .learner$opt.pars, .learner$control, .learner$show.info) lrn = setHyperPars(.learner$next.learner, par.vals = or$x) if ("DownsampleWrapper" %in% class(.learner$next.learner) && !is.null(.learner$control$final.dw.perc) && !is.null(getHyperPars(lrn)$dw.perc) && getHyperPars(lrn)$dw.perc < 1) { messagef("Train model on %f on data.", .learner$control$final.dw.perc) @@ -74,16 +78,17 @@ trainLearner.TuneWrapper = function(.learner, .task, .subset = NULL, ...) { #' @export predictLearner.TuneWrapper = function(.learner, .model, .newdata, ...) { + lrn = setHyperPars(.learner$next.learner, par.vals = .model$learner.model$opt.result$x) predictLearner(lrn, .model$learner.model$next.model, .newdata, ...) } #' @export makeWrappedModel.TuneWrapper = function(learner, learner.model, task.desc, subset = NULL, features, factor.levels, time) { + # set threshold in learner so it is used in predict calls from here on - if (learner$control$tune.threshold) + if (learner$control$tune.threshold) { learner = setPredictThreshold(learner, learner.model$opt.result$threshold) + } addClasses(NextMethod(), "TuneModel") } - - diff --git a/R/UnsupervisedTask.R b/R/UnsupervisedTask.R index 6e4f32578c..15fec43ea3 100644 --- a/R/UnsupervisedTask.R +++ b/R/UnsupervisedTask.R @@ -1,6 +1,7 @@ makeUnsupervisedTask = function(type, data, weights, blocking, fixup.data, check.data, coordinates) { + task = makeTask(type, data, weights, blocking, fixup.data = fixup.data, check.data = check.data, - coordinates = coordinates) + coordinates = coordinates) if (check.data) { # we can't use getTaskData to access the tasks's data here because we then # want to access the description object which is not existing yet @@ -11,6 +12,7 @@ makeUnsupervisedTask = function(type, data, weights, blocking, fixup.data, check #' @export print.UnsupervisedTask = function(x, print.weights = TRUE, ...) { + td = x$task.desc catf("Unsupervised task: %s", td$id) catf("Type: %s", td$type) @@ -18,8 +20,9 @@ print.UnsupervisedTask = function(x, print.weights = TRUE, ...) { catf("Features:") catf(printToChar(td$n.feat, collapse = "\n")) catf("Missings: %s", td$has.missings) - if (print.weights) + if (print.weights) { catf("Has weights: %s", td$has.weights) + } catf("Has blocking: %s", td$has.blocking) catf("Has coordinates: %s", td$has.coordinates) } diff --git a/R/WeightedClassesWrapper.R b/R/WeightedClassesWrapper.R index a160c622e3..841843ca32 100644 --- a/R/WeightedClassesWrapper.R +++ b/R/WeightedClassesWrapper.R @@ -47,12 +47,12 @@ #' lrn = makeWeightedClassesWrapper("classif.ksvm", wcw.weight = 0.01) #' res = holdout(lrn, sonar.task) #' print(calculateConfusionMatrix(res$pred)) -#' +#' #' # using the observation weights of logreg #' lrn = makeWeightedClassesWrapper("classif.logreg", wcw.weight = 0.01) #' res = holdout(lrn, sonar.task) #' print(calculateConfusionMatrix(res$pred)) -#' +#' #' # tuning the imbalancy param and the SVM param in one go #' lrn = makeWeightedClassesWrapper("classif.ksvm", wcw.param = "class.weights") #' ps = makeParamSet( @@ -66,18 +66,21 @@ #' print(res) #' # print(res$opt.path) makeWeightedClassesWrapper = function(learner, wcw.param = NULL, wcw.weight = 1) { + learner = checkLearner(learner, "classif") pv = list() - if (is.null(wcw.param)) + if (is.null(wcw.param)) { wcw.param = learner$class.weights.param - else if (!is.null(learner$class.weights.param) && (learner$class.weights.param != wcw.param)) + } else if (!is.null(learner$class.weights.param) && (learner$class.weights.param != wcw.param)) { stopf("wcw.param (%s) differs from the class.weights.parameter (%s) of the learner!", wcw.param, learner$class.weights.param) + } if (is.null(wcw.param)) { - if (!hasLearnerProperties(learner, "weights")) + if (!hasLearnerProperties(learner, "weights")) { stopf("Learner '%s' does not support observation weights. You have to set 'wcw.param' to the learner param which allows to set class weights! (which hopefully exists...)", learner$id) + } } else { assertSubset(wcw.param, getParamIds(learner$par.set)) } @@ -98,6 +101,7 @@ makeWeightedClassesWrapper = function(learner, wcw.param = NULL, wcw.weight = 1) #' @export trainLearner.WeightedClassesWrapper = function(.learner, .task, .subset = NULL, .weights, wcw.weight = 1, ...) { + .task = subsetTask(.task, .subset) td = getTaskDesc(.task) levs = td$class.levels @@ -123,5 +127,6 @@ trainLearner.WeightedClassesWrapper = function(.learner, .task, .subset = NULL, #' @export getLearnerProperties.WeightedClassesWrapper = function(learner) { + setdiff(getLearnerProperties(learner$next.learner), "weights") } diff --git a/R/WrappedModel.R b/R/WrappedModel.R index 480adc2e91..ebe1cde6d2 100644 --- a/R/WrappedModel.R +++ b/R/WrappedModel.R @@ -27,11 +27,13 @@ #' @export #' @aliases WrappedModel makeWrappedModel = function(learner, learner.model, task.desc, subset, features, factor.levels, time) { + UseMethod("makeWrappedModel") } #' @export makeWrappedModel.Learner = function(learner, learner.model, task.desc, subset, features, factor.levels, time) { + dump = NULL if (is.error(learner.model)) { learner.model = as.character(learner.model) @@ -57,6 +59,7 @@ makeWrappedModel.Learner = function(learner, learner.model, task.desc, subset, f #' @export print.WrappedModel = function(x, ...) { + cat( "Model for learner.id=", x$learner$id, "; learner.class=", getClass1(x$learner), "\n", sprintf("Trained on: task.id = %s; obs = %i; features = %i", @@ -64,8 +67,9 @@ print.WrappedModel = function(x, ...) { "Hyperparameters: ", getHyperParsString(x$learner, show.missing.values = TRUE), "\n", sep = "" ) - if (isFailureModel(x)) + if (isFailureModel(x)) { catf("Training failed: %s", getFailureModelMsg(x)) + } } #' Get underlying R model of learner integrated into mlr. @@ -83,12 +87,14 @@ print.WrappedModel = function(x, ...) { #' model of class [rpart::rpart] for learner \dQuote{classif.rpart}. #' @export getLearnerModel = function(model, more.unwrap = FALSE) { + assertFlag(more.unwrap) UseMethod("getLearnerModel") } -#'@export +#' @export getLearnerModel.WrappedModel = function(model, more.unwrap) { + model$learner.model } @@ -103,12 +109,14 @@ getLearnerModel.WrappedModel = function(model, more.unwrap) { #' @return (`logical(1)`). #' @export isFailureModel = function(model) { + UseMethod("isFailureModel") } #' @export # by default the model is never a failure. if a failure happens we have the derived class FailureModel isFailureModel.WrappedModel = function(model) { + return(FALSE) } @@ -124,11 +132,13 @@ isFailureModel.WrappedModel = function(model) { #' @return (`character(1)`). #' @export getFailureModelMsg = function(model) { + UseMethod("getFailureModelMsg") } #' @export getFailureModelMsg.WrappedModel = function(model) { + return(NA_character_) } @@ -143,13 +153,12 @@ getFailureModelMsg.WrappedModel = function(model) { #' @return (`last.dump`). #' @export getFailureModelDump = function(model) { + UseMethod("getFailureModelDump") } #' @export getFailureModelDump.WrappedModel = function(model) { + return(NULL) } - - - diff --git a/R/aggregations.R b/R/aggregations.R index a463e89e3d..3bb86b20be 100644 --- a/R/aggregations.R +++ b/R/aggregations.R @@ -185,12 +185,12 @@ b632 = makeAggregation( name = ".632 Bootstrap", properties = c("req.train", "req.test"), fun = function(task, perf.test, perf.train, measure, group, pred) { + mean(0.632 * perf.test + 0.368 * perf.train) - } -) + }) -#FIXME: read this again properly and double check it +# FIXME: read this again properly and double check it #' @export #' @rdname aggregations b632plus = makeAggregation( @@ -198,6 +198,7 @@ b632plus = makeAggregation( name = ".632 Bootstrap plus", properties = c("req.train", "req.test"), fun = function(task, perf.test, perf.train, measure, group, pred) { + df = as.data.frame(pred) a = numeric(length(perf.test)) for (i in seq_along(a)) { @@ -214,8 +215,7 @@ b632plus = makeAggregation( a[i] = (1 - w) * perf.train[i] + w * perf.test[i] } return(mean(a)) - } -) + }) #' @export #' @rdname aggregations @@ -224,9 +224,9 @@ testgroup.mean = makeAggregation( name = "Test group mean", properties = "req.test", fun = function(task, perf.test, perf.train, measure, group, pred) { + mean(vnapply(split(perf.test, group), mean)) - } -) + }) #' @export #' @rdname aggregations @@ -235,9 +235,9 @@ testgroup.sd = makeAggregation( name = "Test group standard deviation", properties = "req.test", fun = function(task, perf.test, perf.train, measure, group, pred) { + sd(BBmisc::vnapply(split(perf.test, group), mean)) - } -) + }) #' @export #' @rdname aggregations @@ -246,9 +246,11 @@ test.join = makeAggregation( name = "Test join", properties = "req.test", fun = function(task, perf.test, perf.train, measure, group, pred) { + df = as.data.frame(pred) f = if (length(group)) group[df$iter] else factor(rep(1L, nrow(df))) mean(vnapply(split(df, f), function(df) { + if (pred$predict.type == "response") y = df$response if (pred$predict.type == "prob") { y = df[, stri_startswith_fixed(colnames(df), "prob."), drop = FALSE] @@ -259,5 +261,4 @@ test.join = makeAggregation( time = NA_real_) performance(npred, measure) })) - } -) + }) diff --git a/R/analyzeFeatSelResult.R b/R/analyzeFeatSelResult.R index 8a483b1c8a..e8f5dd1c9c 100644 --- a/R/analyzeFeatSelResult.R +++ b/R/analyzeFeatSelResult.R @@ -15,6 +15,7 @@ #' @family featsel #' @export analyzeFeatSelResult = function(res, reduce = TRUE) { + assertClass(res$control, "FeatSelControlSequential") assertFlag(reduce) @@ -46,8 +47,9 @@ analyzeFeatSelResult = function(res, reduce = TRUE) { df$opt = is.na(df$eol) # number of features in set are sum of bits which are 1 df$n.feats = rowSums(df[, features, drop = FALSE]) - if (reduce) + if (reduce) { df = df[df$sel, , drop = FALSE] + } ### Initialize some variables old.feats = features[df[1L, features, drop = TRUE] == 1] @@ -57,23 +59,26 @@ analyzeFeatSelResult = function(res, reduce = TRUE) { for (thedob in unique(df$dob)) { df.dob = subset(df, df$dob == thedob) df.sel = subset(df.dob, df.dob$sel == TRUE) - if (!reduce) - catf(strrepeat("-", 80)) + if (!reduce) { + catf(strrepeat("-", 80)) + } for (j in seq_row(df.dob)) { row = df.dob[j, ] cur.feats = features[row[features] == 1] cur.sel = ifelse(row$sel, "*", " ") cur.perf = row[, measure] - change.txt = if (thedob == 1L) + change.txt = if (thedob == 1L) { "Init " - else if (length(cur.feats) < length(old.feats)) + } else if (length(cur.feats) < length(old.feats)) { "Remove" - else + } else { "Add " - if (thedob == 1L) + } + if (thedob == 1L) { change.feat = "" - else + } else { change.feat = symdiff(cur.feats, old.feats) + } catf("- Features: %4i %s : %-20s Perf = %s Diff: %s %s", length(cur.feats), change.txt, clipString(change.feat, width.feat), numToString(cur.perf), @@ -94,5 +99,3 @@ analyzeFeatSelResult = function(res, reduce = TRUE) { } invisible(NULL) } - - diff --git a/R/asROCRPrediction.R b/R/asROCRPrediction.R index a0128a84e4..b159c6226e 100644 --- a/R/asROCRPrediction.R +++ b/R/asROCRPrediction.R @@ -5,15 +5,18 @@ #' @family roc #' @family predict asROCRPrediction = function(pred) { + UseMethod("asROCRPrediction") } asROCRPredictionIntern = function(probabilites, truth, negative, positive) { + ROCR::prediction(probabilites, truth, label.ordering = c(negative, positive)) } #' @export asROCRPrediction.Prediction = function(pred) { + if (length(pred$task.desc$class.levels) != 2L) { stop("More than 2 classes!") } @@ -22,6 +25,7 @@ asROCRPrediction.Prediction = function(pred) { #' @export asROCRPrediction.ResamplePrediction = function(pred) { + if (length(pred$task.desc$class.levels) != 2L) { stop("More than 2 classes!") } diff --git a/R/aucc.R b/R/aucc.R index 80e036e8e8..4e612df242 100644 --- a/R/aucc.R +++ b/R/aucc.R @@ -1,10 +1,10 @@ # load_all() # calcAUCCScores = function(pred, measure, cl) { - # cls = pred$task.desc$class.levels - # p = getPredictionProbabilities(pred, classes = cls) - # maxs = apply(p, 1L, max) - # o = order(maxs) +# cls = pred$task.desc$class.levels +# p = getPredictionProbabilities(pred, classes = cls) +# maxs = apply(p, 1L, max) +# o = order(maxs) # } diff --git a/R/batchmark.R b/R/batchmark.R index 2dbf295d34..01b83319c7 100644 --- a/R/batchmark.R +++ b/R/batchmark.R @@ -36,6 +36,7 @@ #' @export #' @family benchmark batchmark = function(learners, tasks, resamplings, measures, models = TRUE, reg = batchtools::getDefaultRegistry()) { + requirePackages("batchtools", why = "batchmark", default.method = "load") learners = ensureBenchmarkLearners(learners) tasks = ensureBenchmarkTasks(tasks) @@ -48,12 +49,14 @@ batchmark = function(learners, tasks, resamplings, measures, models = TRUE, reg # generate problems pdes = Map(function(id, task, rin, seed) { + batchtools::addProblem(id, data = list(rin = rin, task = task, measures = measures, learners = learners), fun = resample.fun, seed = seed, reg = reg) data.table(i = seq_len(rin$desc$iters)) }, id = names(tasks), task = tasks, rin = resamplings, seed = reg$seed + seq_along(tasks)) # generate algos ades = Map(function(id, learner) { + apply.fun = getAlgoFun(learner, measures, models) batchtools::addAlgorithm(id, apply.fun, reg = reg) data.table() @@ -64,14 +67,17 @@ batchmark = function(learners, tasks, resamplings, measures, models = TRUE, reg } resample.fun = function(job, data, i) { + list(train = data$rin$train.inds[[i]], test = data$rin$test.inds[[i]], weights = data$rin$weights[[i]], rdesc = data$rin$desc) } getAlgoFun = function(lrn, measures, models) { + force(lrn) force(measures) force(models) function(job, data, instance) { + extract.this = getExtractor(lrn) calculateResampleIterationResult(learner = lrn, task = data$task, train.i = instance$train, test.i = instance$test, measures = measures, weights = instance$weights, rdesc = instance$rdesc, model = models, extract = extract.this, show.info = FALSE) @@ -98,15 +104,18 @@ getAlgoFun = function(lrn, measures, models) { #' @export #' @family benchmark reduceBatchmarkResults = function(ids = NULL, keep.pred = TRUE, show.info = getMlrOption("show.info"), reg = batchtools::getDefaultRegistry()) { + # registry and ids are asserted later requirePackages("batchtools", why = "batchmark", default.method = "load") assertFlag(keep.pred) assertClass(reg, "ExperimentRegistry") - if (is.null(ids)) + if (is.null(ids)) { ids = batchtools::findDone(reg = reg) - if (NROW(ids) != nrow(batchtools::findExperiments(reg = reg))) + } + if (NROW(ids) != nrow(batchtools::findExperiments(reg = reg))) { warning("Collecting results for a subset of jobs. The resulting BenchmarkResult may be misleading.") + } problem = algorithm = NULL # for data.table's NSE tab = batchtools::getJobPars(ids, reg = reg)[, c("job.id", "problem", "algorithm")] diff --git a/R/benchmark.R b/R/benchmark.R index 8062f04afb..c8fb970ac4 100644 --- a/R/benchmark.R +++ b/R/benchmark.R @@ -39,6 +39,7 @@ #' friedmanTestBMR(bmr) #' friedmanPostHocTestBMR(bmr, p.value = 0.05) benchmark = function(learners, tasks, resamplings, measures, keep.pred = TRUE, models = TRUE, show.info = getMlrOption("show.info")) { + learners = ensureBenchmarkLearners(learners) tasks = ensureBenchmarkTasks(tasks) resamplings = ensureBenchmarkResamplings(resamplings, tasks) @@ -101,9 +102,11 @@ NULL benchmarkParallel = function(task, learner, learners, tasks, resamplings, measures, keep.pred = TRUE, models = TRUE, show.info) { + setSlaveOptions() - if (show.info) + if (show.info) { messagef("Task: %s, Learner: %s", task, learner) + } lrn = learners[[learner]] extract.this = getExtractor(lrn) r = resample(lrn, tasks[[task]], resamplings[[task]], @@ -115,10 +118,12 @@ benchmarkParallel = function(task, learner, learners, tasks, resamplings, measur #' @export print.BenchmarkResult = function(x, ...) { + print(getBMRAggrPerformances(x, as.df = TRUE)) } #' @export as.data.frame.BenchmarkResult = function(x, ...) { + getBMRPerformances(x, as.df = TRUE) } diff --git a/R/benchmark_helpers.R b/R/benchmark_helpers.R index 4a844b15b5..78cce5db1e 100644 --- a/R/benchmark_helpers.R +++ b/R/benchmark_helpers.R @@ -1,43 +1,53 @@ ensureBenchmarkLearners = function(learners) { + learners = ensureVector(learners, 1L, "Learner") learners = lapply(learners, checkLearner) learner.ids = vcapply(learners, getLearnerId) - if (anyDuplicated(learner.ids)) + if (anyDuplicated(learner.ids)) { stop("Learners need unique ids!") + } setNames(learners, learner.ids) } ensureBenchmarkTasks = function(tasks) { + tasks = ensureVector(tasks, 1L, "Task") assertList(tasks, min.len = 1L) checkListElementClass(tasks, "Task") task.ids = vcapply(tasks, getTaskId) - if (anyDuplicated(task.ids)) + if (anyDuplicated(task.ids)) { stop("Tasks need unique ids!") + } setNames(tasks, task.ids) } ensureBenchmarkResamplings = function(resamplings, tasks) { + if (missing(resamplings)) { resamplings = replicate(length(tasks), makeResampleDesc("CV", iters = 10L), simplify = FALSE) } else if (inherits(resamplings, "ResampleInstance") || inherits(resamplings, "ResampleDesc")) { resamplings = replicate(length(tasks), resamplings, simplify = FALSE) } else { assertList(resamplings) - if (length(resamplings) != length(tasks)) + if (length(resamplings) != length(tasks)) { stop("Number of resampling strategies and number of tasks differ!") + } } resamplings = Map(function(res, tt) { - if (inherits(res, "ResampleInstance")) + + if (inherits(res, "ResampleInstance")) { return(res) - if (inherits(res, "ResampleDesc")) + } + if (inherits(res, "ResampleDesc")) { return(makeResampleInstance(res, task = tt)) + } stop("All objects in 'resamplings' must be of class 'ResampleDesc' or 'ResampleInstance'") }, resamplings, tasks) setNames(resamplings, names(tasks)) } ensureBenchmarkMeasures = function(measures, tasks) { + if (missing(measures)) { measures = list(getDefaultMeasure(tasks[[1L]])) } else { @@ -50,6 +60,7 @@ ensureBenchmarkMeasures = function(measures, tasks) { # get extractor function for different wrapped models getExtractor = function(lrn) { + cl = class(lrn) if ("FeatSelWrapper" %in% cl) { extract.this = getFeatSelResult @@ -58,7 +69,10 @@ getExtractor = function(lrn) { } else if ("FilterWrapper" %in% cl) { extract.this = getFilteredFeatures } else { - extract.this = function(model) { NULL } + extract.this = function(model) { + + NULL + } } extract.this } diff --git a/R/cache_helpers.R b/R/cache_helpers.R index 7c06f7627b..54f9c39e5c 100644 --- a/R/cache_helpers.R +++ b/R/cache_helpers.R @@ -12,12 +12,14 @@ NULL #' @rdname cache_helpers #' @export getCacheDir = function() { + rappdirs::user_cache_dir("mlr", "mlr-org") } #' @rdname cache_helpers #' @export deleteCacheDir = function() { + unlink(rappdirs::user_cache_dir("mlr", "mlr-org"), recursive = TRUE) catf("Successfully cleared directory '%s'.", rappdirs::user_cache_dir("mlr", "mlr-org")) } diff --git a/R/calculateConfusionMatrix.R b/R/calculateConfusionMatrix.R index 93ab87a294..aacc41d31d 100644 --- a/R/calculateConfusionMatrix.R +++ b/R/calculateConfusionMatrix.R @@ -45,12 +45,12 @@ #' print(calculateConfusionMatrix(pred)) #' print(calculateConfusionMatrix(pred, sums = TRUE)) #' print(calculateConfusionMatrix(pred, relative = TRUE)) -#' +#' #' # now after cross-validation #' r = crossval("classif.lda", iris.task, iters = 2L) #' print(calculateConfusionMatrix(r$pred)) - calculateConfusionMatrix = function(pred, relative = FALSE, sums = FALSE, set = "both") { + checkPrediction(pred, task.type = "classif", check.truth = TRUE, no.na = TRUE) assertFlag(relative) assertFlag(sums) @@ -60,14 +60,14 @@ calculateConfusionMatrix = function(pred, relative = FALSE, sums = FALSE, set = truth = getPredictionTruth(pred) if (set != "both") { - assertClass(pred, classes = "ResamplePrediction") - subset.idx = (pred$data$set == set) + assertClass(pred, classes = "ResamplePrediction") + subset.idx = (pred$data$set == set) - if (!any(subset.idx)) { - stopf("prediction object contains no observations for set = '%s'", set) - } - truth = truth[subset.idx] - resp = resp[subset.idx] + if (!any(subset.idx)) { + stopf("prediction object contains no observations for set = '%s'", set) + } + truth = truth[subset.idx] + resp = resp[subset.idx] } cls = union(levels(resp), levels(truth)) @@ -100,17 +100,19 @@ calculateConfusionMatrix = function(pred, relative = FALSE, sums = FALSE, set = if (relative) { normConfMatrix = function(r) { - if (any(r[js] > 0)) + + if (any(r[js] > 0)) { r / sum(r[js]) - else + } else { rep(0, k) + } } - #normalize by rows and add margins as a new column + # normalize by rows and add margins as a new column result.rel.row = t(apply(tab, 1, normConfMatrix)) result.rel.row = cbind(result.rel.row, "-err-" = rowSums(result.rel.row) - diag(result.rel.row)) - #normalize by columns and add margins as a new row + # normalize by columns and add margins as a new row result.rel.col = apply(tab, 2, normConfMatrix) result.rel.col = rbind(result.rel.col, "-err-" = colSums(result.rel.col) - diag(result.rel.col)) @@ -138,7 +140,7 @@ print.ConfusionMatrix = function(x, both = TRUE, digits = 2, ...) { assertFlag(both) assertInt(digits, lower = 1) - #formatting stuff, use digits after(!) the decimal point. + # formatting stuff, use digits after(!) the decimal point. nsmall = digits digits = nsmall - 1 @@ -159,12 +161,12 @@ print.ConfusionMatrix = function(x, both = TRUE, digits = 2, ...) { full.err = stri_pad_right(format(x$relative.error, digits = digits, nsmall = nsmall), width = nchar(res[1, 1])) - #bind marginal errors correctly formatted to rows and columns + # bind marginal errors correctly formatted to rows and columns res = rbind(res, stri_pad_left(format(col.err, digits = digits, nsmall = nsmall), width = nchar(res[1, 1]))) res = cbind(res, c(format(row.err, digits = digits, nsmall = nsmall), full.err)) - #also bind the marginal sums to the relative confusion matrix for printing + # also bind the marginal sums to the relative confusion matrix for printing if (x$sums) { res = rbind(cbind(res, c(x$result["-n-", 1:k], NA)), c(x$result[1:k, "-n-"], NA, n)) dimnames(res) = list(true = c(cls, "-err.-", "-n-"), predicted = c(cls, "-err.-", "-n-")) diff --git a/R/calculateROCMeasures.R b/R/calculateROCMeasures.R index 6036cfcd0e..f68a235772 100644 --- a/R/calculateROCMeasures.R +++ b/R/calculateROCMeasures.R @@ -35,7 +35,6 @@ #' fit = train(lrn, sonar.task) #' pred = predict(fit, task = sonar.task) #' calculateROCMeasures(pred) -#' calculateROCMeasures = function(pred) { checkPrediction(pred, task.type = "classif", check.truth = TRUE, no.na = TRUE, binary = TRUE) @@ -45,7 +44,7 @@ calculateROCMeasures = function(pred) { positive = pred$task.desc$positive negative = pred$task.desc$negative - #calculate measures + # calculate measures r.tpr = measureTPR(truth, response, positive) r.fnr = measureFNR(truth, response, negative, positive) r.fpr = measureFPR(truth, response, negative, positive) @@ -91,7 +90,7 @@ print.ROCMeasures = function(x, abbreviations = TRUE, digits = 2, ...) { checkFlag(abbreviations) checkInt(digits, lower = 1) - #format measures + # format measures x$measures = mapply(function(m, v) paste0(m, ": ", round(v, digits)), names(x$measures), x$measures) res = cbind(round(x$confusion.matrix, digits = digits), @@ -101,7 +100,7 @@ print.ROCMeasures = function(x, abbreviations = TRUE, digits = 2, ...) { c(x$measures[["ppv"]], x$measures[["fomr"]], x$measures[["lrp"]], x$measures[["acc"]]), c(x$measures[["fdr"]], x$measures[["npv"]], x$measures[["lrm"]], x$measures[["dor"]])) - #since we should not use "for" as a variable name in a list we replace it in the printer + # since we should not use "for" as a variable name in a list we replace it in the printer res[3, 2] = stri_replace_all_fixed(res[3, 2], "fomr", "for") names(dimnames(res)) = c("true", "predicted") @@ -122,4 +121,3 @@ print.ROCMeasures = function(x, abbreviations = TRUE, digits = 2, ...) { cat("dor - Diagnostic odds ratio\n") } } - diff --git a/R/capLargeValues.R b/R/capLargeValues.R index ebd7c6c33a..859e7fab3e 100644 --- a/R/capLargeValues.R +++ b/R/capLargeValues.R @@ -34,6 +34,7 @@ #' capLargeValues(iris, threshold = 5, impute = 5) capLargeValues = function(obj, target = character(0L), cols = NULL, threshold = Inf, impute = threshold, what = "abs") { + checkTargetPreproc(obj, target, cols) assertNumber(threshold, lower = 0) assertNumber(impute, lower = 0) @@ -44,6 +45,7 @@ capLargeValues = function(obj, target = character(0L), cols = NULL, #' @export capLargeValues.Task = function(obj, target = character(0L), cols = NULL, threshold = Inf, impute = threshold, what = "abs") { + d = getTaskData(obj) d = capLargeValues.data.frame(d, target = character(0L), cols = cols, threshold = threshold, impute = impute) @@ -53,14 +55,16 @@ capLargeValues.Task = function(obj, target = character(0L), cols = NULL, #' @export capLargeValues.data.frame = function(obj, target = character(0L), cols = NULL, threshold = Inf, impute = threshold, what = "abs") { + allnumfeats = colnames(obj)[vlapply(obj, is.numeric)] allnumfeats = setdiff(allnumfeats, target) # check that user requested cols are only numeric cols with the target - if (!is.null(cols)) + if (!is.null(cols)) { assertSubset(cols, allnumfeats) - else + } else { cols = allnumfeats + } fun = switch(what, abs = function(x) abs(x) > threshold, @@ -71,8 +75,9 @@ capLargeValues.data.frame = function(obj, target = character(0L), cols = NULL, for (cn in cols) { x = obj[[cn]] ind = which(fun(x)) - if (length(ind) > 0L) + if (length(ind) > 0L) { obj[ind, cn] = ifelse(x[ind] > threshold, impute, -impute) + } } return(obj) } diff --git a/R/checkAggrBeforeResample.R b/R/checkAggrBeforeResample.R index 0df8cf6aa4..ad4396eb0e 100644 --- a/R/checkAggrBeforeResample.R +++ b/R/checkAggrBeforeResample.R @@ -1,6 +1,7 @@ # check whether rdesc$predict is set, so that the requiring properties of the measure are satisfied # called the beginning of resample checkAggrBeforeResample = function(measure, rdesc) { + a = measure$aggr p = a$properties pred = rdesc$predict @@ -13,13 +14,13 @@ checkAggrBeforeResample = function(measure, rdesc) { } else { c("train", "test", "both") } - if (pred %nin% p.allowed) + if (pred %nin% p.allowed) { stopf("Aggregation '%s' not compatible with resampling! You have to set arg 'predict' to %s in your resample object, instead it is '%s'!", a$id, stri_paste("'", p.allowed, "'", collapse = " or "), pred) + } } # map the checker over multiple measures checkAggrsBeforeResample = function(measures, rdesc) { + lapply(measures, checkAggrBeforeResample, rdesc = rdesc) } - - diff --git a/R/checkBMRMeasure.R b/R/checkBMRMeasure.R index e04993e36f..c9521c4afa 100644 --- a/R/checkBMRMeasure.R +++ b/R/checkBMRMeasure.R @@ -1,6 +1,7 @@ # small arg checker for a selected measure for a BMR # if NULL, the 1st measure in the BMR is returned checkBMRMeasure = function(measure, bmr) { + if (is.null(measure)) { measure = getBMRMeasures(bmr)[[1]] } else { diff --git a/R/checkLearner.R b/R/checkLearner.R index 29809096fd..57fb8d1019 100644 --- a/R/checkLearner.R +++ b/R/checkLearner.R @@ -11,10 +11,12 @@ #' @keywords internal #' @export checkLearner = function(learner, type = NULL, props = NULL) { - if (is.character(learner)) + + if (is.character(learner)) { learner = makeLearner(learner) - else + } else { assertClass(learner, classes = "Learner") + } if (!is.null(type) && learner$type %nin% type) { stopf("Learner '%s' must be of type '%s', not: '%s'", learner$id, collapse(type), learner$type) @@ -23,11 +25,10 @@ checkLearner = function(learner, type = NULL, props = NULL) { if (!is.null(props)) { learner.props = getLearnerProperties(learner) missing.props = setdiff(props, learner.props) - if (length(missing.props) > 0L){ + if (length(missing.props) > 0L) { stopf("Learner '%s' must support properties '%s', but does not support '%s'.", learner$id, collapse(props), collapse(missing.props)) } } return(learner) } - diff --git a/R/checkLearnerBeforeTrain.R b/R/checkLearnerBeforeTrain.R index 4a8c2bd6bd..ffbffc737d 100644 --- a/R/checkLearnerBeforeTrain.R +++ b/R/checkLearnerBeforeTrain.R @@ -1,5 +1,7 @@ checkLearnerBeforeTrain = function(task, learner, weights) { - getColNames = function(task, property){ + + getColNames = function(task, property) { + .data = getTaskData(task, functionals.as = "matrix") has.it = vlapply(.data, function(x) any(property(x))) clipString(collapse(colnames(.data)[has.it], ", "), 50L) @@ -32,7 +34,7 @@ checkLearnerBeforeTrain = function(task, learner, weights) { } if (td$n.feat["functionals"] > 1 && hasLearnerProperties(learner, "single.functional") && - !hasLearnerProperties(learner, "functionals")) { + !hasLearnerProperties(learner, "functionals")) { stopf("Task '%s' has more than one functional inputs, but learner '%s' does not support that!", td$id, learner$id) } @@ -47,14 +49,17 @@ checkLearnerBeforeTrain = function(task, learner, weights) { if (td$type == "classif") { if (length(td$class.levels) == 1L) { - if (!hasLearnerProperties(learner, "oneclass")) + if (!hasLearnerProperties(learner, "oneclass")) { stopf("Task '%s' is a one-class-problem, but learner '%s' does not support that!", td$id, learner$id) + } } else if (length(td$class.levels) == 2L) { - if (!hasLearnerProperties(learner, "twoclass")) + if (!hasLearnerProperties(learner, "twoclass")) { stopf("Task '%s' is a two-class-problem, but learner '%s' does not support that!", td$id, learner$id) + } } else { - if (!hasLearnerProperties(learner, "multiclass")) + if (!hasLearnerProperties(learner, "multiclass")) { stopf("Task '%s' is a multiclass-problem, but learner '%s' does not support that!", td$id, learner$id) + } } } invisible(NULL) diff --git a/R/checkMeasures.R b/R/checkMeasures.R index 0084873174..55cb0d084b 100644 --- a/R/checkMeasures.R +++ b/R/checkMeasures.R @@ -1,11 +1,13 @@ checkMeasures = function(measures, obj, aggr = NULL) { + if (missing(measures) || is.null(measures)) { measures = list(getDefaultMeasure(obj)) } else { measures = ensureVector(measures, n = 1L, cl = "Measure") assertList(measures, types = "Measure", min.len = 1L) } - if (!is.null(aggr)) + if (!is.null(aggr)) { measures = lapply(measures, setAggregation, aggr = aggr) + } return(measures) } diff --git a/R/checkPrediction.R b/R/checkPrediction.R index e9c7206fe9..7ab342cb97 100644 --- a/R/checkPrediction.R +++ b/R/checkPrediction.R @@ -1,20 +1,26 @@ checkPrediction = function(pred, task.type = NULL, binary = FALSE, predict.type = NULL, check.truth = FALSE, no.na = TRUE) { + assertClass(pred, "Prediction") - if (!is.null(task.type) && pred$task.desc$type %nin% task.type) + if (!is.null(task.type) && pred$task.desc$type %nin% task.type) { stopf("Prediction must be one of '%s', but is: '%s'", collapse(task.type), pred$task.desc$type) + } if (binary) { nlevs = length(pred$task.desc$class.levels) - if (nlevs != 2L) + if (nlevs != 2L) { stopf("Prediction must be for binary classification, but has %i class levels!", nlevs) + } } - if (!is.null(predict.type) && pred$predict.type %nin% predict.type) + if (!is.null(predict.type) && pred$predict.type %nin% predict.type) { stopf("predict.type must be one of '%s', but is: '%s'", collapse(predict.type), pred$predict.type) - if (check.truth && is.null(pred$data$truth)) + } + if (check.truth && is.null(pred$data$truth)) { stopf("Prediction object does not contain ground truth column 'truth'!") + } if (no.na) { r = getPredictionResponse(pred) - if (anyMissing(r)) + if (anyMissing(r)) { stopf("Prediction object contains NAs in response, this likely due to a prediction from a FailureModel!") + } } } diff --git a/R/checkTargetPreproc.R b/R/checkTargetPreproc.R index 211199a8e4..a7e667c651 100644 --- a/R/checkTargetPreproc.R +++ b/R/checkTargetPreproc.R @@ -7,28 +7,33 @@ # target: only given for df # cols: character vec for columns we operate on, or NULL if no specific ols are requested checkTargetPreproc = function(obj, target, cols) { + assert(checkClass(obj, "data.frame"), checkClass(obj, "Task")) assertCharacter(target, any.missing = FALSE) - if (!is.null(cols)) + if (!is.null(cols)) { assertCharacter(cols, any.missing = FALSE) + } if (inherits(obj, "data.frame")) { # if we habe a target, check that it exists in df 'obj' if (length(target > 0L)) { not.ok = which.first(target %nin% names(obj)) - if (length(not.ok) != 0L) + if (length(not.ok) != 0L) { stopf("Target column '%s' must be present in data", target[not.ok]) + } } } else { # if we have a Task # check that user does not pass target for Task - if (length(target) > 0L) + if (length(target) > 0L) { stop("Don't provide target names if you pass a task!") + } } # if we habe a target, check that the user does not request preprocessing on it if (length(target > 0L) && !is.null(cols)) { not.ok = which.first(target %in% cols) - if (length(not.ok) > 0L) + if (length(not.ok) > 0L) { stopf("Preprocessing of target column '%s' not possible", target[not.ok]) + } } } diff --git a/R/checkTask.R b/R/checkTask.R index 24aa0e4bc0..2752ffe5d8 100644 --- a/R/checkTask.R +++ b/R/checkTask.R @@ -1,6 +1,7 @@ # performs arg checks of a task (or maybe also allow an taskdesc) # you can check that the task is from a list of certain types checkTask = function(x, cl = "Task", allow.desc = FALSE, task.type = NULL, binary = FALSE, .var.name = "task") { + if (allow.desc) { assert(.var.name = .var.name, checkClass(x, classes = cl), @@ -11,8 +12,10 @@ checkTask = function(x, cl = "Task", allow.desc = FALSE, task.type = NULL, binar } td = getTaskDesc(x) - if (!is.null(task.type) && td$type %nin% task.type) + if (!is.null(task.type) && td$type %nin% task.type) { stopf("Task must be one of '%s', but is: '%s'", collapse(task.type), td$type) - if (binary && length(td$class.levels) != 2L) + } + if (binary && length(td$class.levels) != 2L) { stopf("Task '%s' must be binary classification!", td$id) + } } diff --git a/R/checkTaskSubset.R b/R/checkTaskSubset.R index 0aef134bc5..5255ef6528 100644 --- a/R/checkTaskSubset.R +++ b/R/checkTaskSubset.R @@ -2,6 +2,7 @@ # @param size [int(1)]\cr size of the dataset to subset # @return numeric vector of subset indices checkTaskSubset = function(subset = NULL, size) { + assertCount(size) if (is.null(subset)) { seq_len(size) diff --git a/R/checkTunerParset.R b/R/checkTunerParset.R index e349cae0e8..3e10c13a50 100644 --- a/R/checkTunerParset.R +++ b/R/checkTunerParset.R @@ -6,31 +6,39 @@ # - algo can handle dependencies checkTunerParset = function(learner, par.set, measures, control) { + cl = getClass1(control) - if (getParamNr(par.set) == 0L) + if (getParamNr(par.set) == 0L) { stop("No parameters were passed!") + } x = setdiff(names(par.set$pars), names(getParamSet(learner)$pars)) - if (length(x) > 0L) + if (length(x) > 0L) { stopf("Can only tune parameters for which learner parameters exist: %s", collapse(x)) + } checkParsOk = function(algo, ok) - if (length(filterParams(par.set, type = ok)$pars) < length(par.set$pars)) + if (length(filterParams(par.set, type = ok)$pars) < length(par.set$pars)) { stopf("%s can only be applied to: %s!", algo, collapse(ok)) + } checkStart = function() { + if (!is.null(control$start)) { - if (length(control$start) != length(par.set$pars)) + if (length(control$start) != length(par.set$pars)) { stop("Length of 'start' has to match number of parameters in 'par.set'!") + } x = setdiff(names(control$start), names(getParamSet(learner)$pars)) - if (length(x)) + if (length(x)) { stopf("'start' contains parameters for which no learner parameters exist: %s", collapse(x)) + } } } - if (control$tune.threshold && (learner$type != "classif" || learner$predict.type != "prob")) + if (control$tune.threshold && (learner$type != "classif" || learner$predict.type != "prob")) { stop("Using 'tune.threshold' requires a classif learner with predict.type = 'prob'!") + } # check special conditions for some tuners if (inherits(control, "TuneControlCMAES")) { @@ -47,13 +55,16 @@ checkTunerParset = function(learner, par.set, measures, control) { # check requires / dependent params if (hasRequires(par.set) && cl %nin% c("TuneControlRandom", "TuneControlGrid", - "TuneControlDesign", "TuneControlIrace", "TuneControlMBO", "TuneMultiCritControlRandom", - "TuneMultiCritControlMBO")) + "TuneControlDesign", "TuneControlIrace", "TuneControlMBO", "TuneMultiCritControlRandom", + "TuneMultiCritControlMBO")) { stopf("Tuning algorithm for '%s' cannot handle dependent parameters!", cl) + } - if (inherits(control, "TuneMultiCritControl")) - if (length(control$impute.val) != length(measures)) + if (inherits(control, "TuneMultiCritControl")) { + if (length(control$impute.val) != length(measures)) { stop("Length of 'impute.val' must coincide with number of measures!") + } + } } diff --git a/R/configureMlr.R b/R/configureMlr.R index ec846ad590..8f95cb52af 100644 --- a/R/configureMlr.R +++ b/R/configureMlr.R @@ -115,7 +115,8 @@ configureMlr = function(show.info, on.learner.error, on.learner.warning, # no change, set everything to defaults # FIXME: this is a horrible mechanism! How can I get a list of all mlr options? - if (!any.change) + if (!any.change) { Map(setMlrOption, names(defaults), defaults) + } invisible(NULL) } diff --git a/R/convertBMRToRankMatrix.R b/R/convertBMRToRankMatrix.R index 020ded1f4b..8e5592b896 100644 --- a/R/convertBMRToRankMatrix.R +++ b/R/convertBMRToRankMatrix.R @@ -18,6 +18,7 @@ #' @examples #' # see benchmark convertBMRToRankMatrix = function(bmr, measure = NULL, ties.method = "average", aggregation = "default") { + assertClass(bmr, "BenchmarkResult") measure = checkBMRMeasure(measure, bmr) assertChoice(aggregation, c("mean", "default")) @@ -34,9 +35,10 @@ convertBMRToRankMatrix = function(bmr, measure = NULL, ties.method = "average", } # calculate ranks, rank according to minimize option of the measure - if (!measure$minimize) + if (!measure$minimize) { df$x = -df$x - df[, "alg.rank" := rank(.SD$x, ties.method = ties.method), by = "task.id"] # nolint FIXME: find out what `:=` looks like in the AST and adjust the linter + } + df[, "alg.rank" := rank(.SD$x, ties.method = ties.method), by = "task.id"] # nolint FIXME: find out what `:=` looks like in the AST and adjust the linter # convert into matrix, rows = leaner, cols = tasks df = melt(setDF(df), c("task.id", "learner.id"), "alg.rank") diff --git a/R/convertMLBenchObjToTask.R b/R/convertMLBenchObjToTask.R index f27a962ad7..6c7c8a1f86 100644 --- a/R/convertMLBenchObjToTask.R +++ b/R/convertMLBenchObjToTask.R @@ -17,6 +17,7 @@ #' print(convertMLBenchObjToTask("Ionosphere")) #' print(convertMLBenchObjToTask("mlbench.spirals", n = 100, sd = 0.1)) convertMLBenchObjToTask = function(x, n = 100L, ...) { + assertString(x) requirePackages("mlbench") id = x @@ -62,9 +63,10 @@ convertMLBenchObjToTask = function(x, n = 100L, ...) { d = as.data.frame(z) target = if (!is.null(z$classes)) "classes" else "y" } - task = if (is.factor(d[, target])) + task = if (is.factor(d[, target])) { makeClassifTask(id = id, data = d, target = target) - else + } else { makeRegrTask(id = id, data = d, target = target) + } return(task) } diff --git a/R/convertX.R b/R/convertX.R index dc0cb19eeb..ab188bb3fd 100644 --- a/R/convertX.R +++ b/R/convertX.R @@ -1,5 +1,6 @@ # start is a named list, flatten it to an unnamed num vec, of correct order as in par.set convertStartToNumeric = function(start, par.set) { + ids = getParamIds(par.set, repeated = FALSE) start = start[ids] as.numeric(unlist(start)) @@ -8,6 +9,7 @@ convertStartToNumeric = function(start, par.set) { # converter for single x # leaves x unchanged convertXIdentity = function(x, par.set) { + return(x) } @@ -15,6 +17,7 @@ convertXIdentity = function(x, par.set) { # takes a flat vector of all joint values and produces a normal named list, # looking at the par.set structure convertXNumeric = function(x, par.set) { + ids = getParamIds(par.set, repeated = TRUE, with.nr = FALSE) # factor usually does sort(unique(...)) for levels which changes order! x = split(x, factor(ids, levels = unique(ids))) @@ -28,31 +31,38 @@ convertXNumeric = function(x, par.set) { # each col is a flat vector of all joint values, # we produce a list of list parvals, calling convertXNumeric on the cols convertXVectorizedMatrixCols = function(xs, par.set) { + rownames(xs) = colnames(xs) = NULL xs = lapply(seq_col(xs), function(i) { + convertXNumeric(xs[, i], par.set) }) } roundIntegers = function(x, par.set) { + Map(function(par, v) { - if (par$type %in% c("integer", "integervector")) + + if (par$type %in% c("integer", "integervector")) { as.integer(round(v)) - else + } else { v + } }, par.set$pars, x) } # convert logical param values from chars to true logicals, # eg irace produces strings in tuning convertXVectorizedBooleanStringsToLogical = function(x, par.set) { + cx = function(x) { + types = getParamTypes(par.set, use.names = TRUE) j = types %in% c("logical", "logicalvector") - if (any(j)) + if (any(j)) { x[j] = lapply(x[j], as.logical) + } return(x) } lapply(x, cx) } - diff --git a/R/createDummyFeatures.R b/R/createDummyFeatures.R index 3b07ee3afd..0cd52f3c4f 100644 --- a/R/createDummyFeatures.R +++ b/R/createDummyFeatures.R @@ -19,15 +19,18 @@ #' @export #' @family eda_and_preprocess createDummyFeatures = function(obj, target = character(0L), method = "1-of-n", cols = NULL) { + assertChoice(method, choices = c("1-of-n", "reference")) - if (!is.factor(obj) && !is.character(obj)) + if (!is.factor(obj) && !is.character(obj)) { checkTargetPreproc(obj, target, cols) + } UseMethod("createDummyFeatures") } #' @export createDummyFeatures.data.frame = function(obj, target = character(0L), method = "1-of-n", cols = NULL) { - # get all factor feature names present in data + + # get all factor feature names present in data work.cols = colnames(obj)[vlapply(obj, is.factor)] work.cols = setdiff(work.cols, target) @@ -49,7 +52,9 @@ createDummyFeatures.data.frame = function(obj, target = character(0L), method = if (method == "reference" && length(work.cols) == length(dummies)) { colnames(dummies) = Map(function(col, pre) { - stri_paste(pre, tail(levels(col), -1), sep = ".")}, obj[work.cols], prefix) + + stri_paste(pre, tail(levels(col), -1), sep = ".") + }, obj[work.cols], prefix) } if (length(dummies) != 0) { @@ -64,6 +69,7 @@ createDummyFeatures.data.frame = function(obj, target = character(0L), method = #' @export createDummyFeatures.Task = function(obj, target = character(0L), method = "1-of-n", cols = NULL) { + target = getTaskTargetNames(obj) d = createDummyFeatures(obj = getTaskData(obj), target = target, method = method, cols = cols) changeData(obj, d) @@ -72,6 +78,7 @@ createDummyFeatures.Task = function(obj, target = character(0L), method = "1-of- #' @export createDummyFeatures.factor = function(obj, target = character(0L), method = "1-of-n", cols = NULL) { + dcol = as.data.frame(obj) colname = colnames(dcol) if (method == "1-of-n") { @@ -88,5 +95,6 @@ createDummyFeatures.factor = function(obj, target = character(0L), method = "1-o #' @export createDummyFeatures.character = function(obj, target = character(0L), method = "1-of-n", cols = NULL) { + createDummyFeatures(as.factor(obj), method = method) } diff --git a/R/createSpatialResamplingPlots.R b/R/createSpatialResamplingPlots.R index 61eb2e50e7..a1299f0175 100644 --- a/R/createSpatialResamplingPlots.R +++ b/R/createSpatialResamplingPlots.R @@ -67,57 +67,57 @@ #' \donttest{ #' rdesc = makeResampleDesc("SpRepCV", folds = 5, reps = 4) #' r = resample(makeLearner("classif.qda"), spatial.task, rdesc) -#' -#' ##------------------------------------------------------------- +#' +#' ## ------------------------------------------------------------- #' ## single unnamed resample input with 5 folds and 2 repetitions -#' ##------------------------------------------------------------- -#' +#' ## ------------------------------------------------------------- +#' #' plots = createSpatialResamplingPlots(spatial.task, r, crs = 32717, #' repetitions = 2, x.axis.breaks = c(-79.065, -79.085), #' y.axis.breaks = c(-3.970, -4)) #' cowplot::plot_grid(plotlist = plots[["Plots"]], ncol = 5, nrow = 2, #' labels = plots[["Labels"]]) -#' -#' ##-------------------------------------------------------------------------- +#' +#' ## -------------------------------------------------------------------------- #' ## single named resample input with 5 folds and 1 repetition and 32717 datum -#' ##-------------------------------------------------------------------------- -#' +#' ## -------------------------------------------------------------------------- +#' #' plots = createSpatialResamplingPlots(spatial.task, list("Resamp" = r), #' crs = 32717, datum = 32717, repetitions = 1) #' cowplot::plot_grid(plotlist = plots[["Plots"]], ncol = 5, nrow = 1, #' labels = plots[["Labels"]]) -#' -#' ##------------------------------------------------------------- +#' +#' ## ------------------------------------------------------------- #' ## multiple named resample inputs with 5 folds and 1 repetition -#' ##------------------------------------------------------------- -#' +#' ## ------------------------------------------------------------- +#' #' rdesc1 = makeResampleDesc("SpRepCV", folds = 5, reps = 4) #' r1 = resample(makeLearner("classif.qda"), spatial.task, rdesc1) #' rdesc2 = makeResampleDesc("RepCV", folds = 5, reps = 4) #' r2 = resample(makeLearner("classif.qda"), spatial.task, rdesc2) -#' +#' #' plots = createSpatialResamplingPlots(spatial.task, #' list("SpRepCV" = r1, "RepCV" = r2), crs = 32717, repetitions = 1, #' x.axis.breaks = c(-79.055, -79.085), y.axis.breaks = c(-3.975, -4)) #' cowplot::plot_grid(plotlist = plots[["Plots"]], ncol = 5, nrow = 2, #' labels = plots[["Labels"]]) -#' -#' ##------------------------------------------------------------------------------------- +#' +#' ## ------------------------------------------------------------------------------------- #' ## Complex arrangements of multiple named resample inputs with 5 folds and 1 repetition -#' ##------------------------------------------------------------------------------------- -#' -#' p1 <- plot_grid(plist[["Plots"]][[1]], plist[["Plots"]][[2]], +#' ## ------------------------------------------------------------------------------------- +#' +#' p1 = plot_grid(plist[["Plots"]][[1]], plist[["Plots"]][[2]], #' plist[["Plots"]][[3]], ncol = 3, nrow = 1, labels = plist[["Labels"]][1:3], #' label_size = 18) -#' p12 <- plot_grid(plist[["Plots"]][[4]], plist[["Plots"]][[5]], ncol = 2, -#' nrow = 1, labels = plist[["Labels"]][4:5], label_size = 18) -#' -#' p2 <- plot_grid(plist[["Plots"]][[6]], plist[["Plots"]][[7]], +#' p12 = plot_grid(plist[["Plots"]][[4]], plist[["Plots"]][[5]], ncol = 2, +#' nrow = 1, labels = plist[["Labels"]][4:5], label_size = 18) +#' +#' p2 = plot_grid(plist[["Plots"]][[6]], plist[["Plots"]][[7]], #' plist[["Plots"]][[8]], ncol = 3, nrow = 1, labels = plist[["Labels"]][6:8], #' label_size = 18) -#' p22 <- plot_grid(plist[["Plots"]][[9]], plist[["Plots"]][[10]], ncol = 2, +#' p22 = plot_grid(plist[["Plots"]][[9]], plist[["Plots"]][[10]], ncol = 2, #' nrow = 1, labels = plist[["Labels"]][9:10], label_size = 18) -#' +#' #' cowplot::plot_grid(p1, p12, p2, p22, ncol = 1) #' } #' @export @@ -130,11 +130,13 @@ createSpatialResamplingPlots = function(task = NULL, resample = NULL, crs = NULL requireNamespace("sf", quietly = TRUE) # some checks - if (is.null(crs)) + if (is.null(crs)) { stopf("Please specify a crs that matches the coordinates of the task.") - if(task$task.desc$has.coordinates == FALSE) + } + if (task$task.desc$has.coordinates == FALSE) { stopf("The supplied task needs to have coordinates.") - if(!identical(as.integer(rownames(task$env$data)), 1:length(task$env$data[, 1]))) { + } + if (!identical(as.integer(rownames(task$env$data)), 1:length(task$env$data[, 1]))) { rownames(task$env$data) = seq(1:length(task$env$data[, 1])) } @@ -165,23 +167,23 @@ createSpatialResamplingPlots = function(task = NULL, resample = NULL, crs = NULL # create plot list with length = folds plot.list = rep(list(data), nfolds * repetitions) - plot.list.out = imap(plot.list, function (.x, .y) { + plot.list.out = imap(plot.list, function(.x, .y) { + ggplot(.x) + - geom_sf(data = subset(.x, as.integer(rownames(.x)) %in% - r$pred$instance[["train.inds"]][[.y]]), + geom_sf(data = subset(.x, as.integer(rownames(.x)) %in% + r$pred$instance[["train.inds"]][[.y]]), color = color.train, size = point.size, ) + - geom_sf(data = subset(.x,as.integer(rownames(.x)) %in% - r$pred$instance[["test.inds"]][[.y]]), + geom_sf(data = subset(.x, as.integer(rownames(.x)) %in% + r$pred$instance[["test.inds"]][[.y]]), color = color.test, size = point.size) + - scale_x_continuous(breaks = x.axis.breaks) + - scale_y_continuous(breaks = y.axis.breaks) + - coord_sf(datum = sf::st_crs(datum)) + - hrbrthemes::theme_ipsum_rc() + - theme(axis.text.x = element_text(size = axis.text.size), - axis.text.y = element_text(size = axis.text.size), - plot.margin = unit(c(0.5, 0.2, 0.2, 0.2), "cm")) - } - ) + scale_x_continuous(breaks = x.axis.breaks) + + scale_y_continuous(breaks = y.axis.breaks) + + coord_sf(datum = sf::st_crs(datum)) + + hrbrthemes::theme_ipsum_rc() + + theme(axis.text.x = element_text(size = axis.text.size), + axis.text.y = element_text(size = axis.text.size), + plot.margin = unit(c(0.5, 0.2, 0.2, 0.2), "cm")) + }) return(plot.list.out) }) @@ -197,7 +199,7 @@ createSpatialResamplingPlots = function(task = NULL, resample = NULL, crs = NULL reps_nfolds = c(reps_nfolds, rep(i, nfolds)) if (!is.null(names(resample))) { names.resample = c(names.resample, rep(names(resample)[i], - nfolds * repetitions)) + nfolds * repetitions)) } } # account for multiple resamp objects diff --git a/R/crossover.R b/R/crossover.R index 580ace40f2..e0889ef932 100644 --- a/R/crossover.R +++ b/R/crossover.R @@ -17,6 +17,7 @@ NULL crossover = function(x, y, rate = 0.5) { + ratio = rbinom(length(x), 1, rate) ifelse(ratio == 1, x, y) } diff --git a/R/downsample.R b/R/downsample.R index ab8708af6c..92f902e5ba 100644 --- a/R/downsample.R +++ b/R/downsample.R @@ -18,6 +18,7 @@ #' @family downsample #' @export downsample = function(obj, perc = 1, stratify = FALSE) { + assertNumber(perc, lower = 0, upper = 1) assertFlag(stratify) UseMethod("downsample") @@ -25,15 +26,17 @@ downsample = function(obj, perc = 1, stratify = FALSE) { #' @export downsample.Task = function(obj, perc = 1, stratify = FALSE) { + rin = makeResampleInstance("Holdout", stratify = stratify, split = perc, task = obj) subsetTask(task = obj, subset = rin$train.inds[[1L]]) } #' @export downsample.ResampleInstance = function(obj, perc = 1, stratify = FALSE) { - if (stratify) + + if (stratify) { stop("Stratifying is not supported for a ResampleInstance!") + } obj$train.inds = lapply(obj$train.inds, function(x) sample(x, size = length(x) * perc)) return(obj) } - diff --git a/R/dropFeatures.R b/R/dropFeatures.R index 13ecca6057..f029cf9415 100644 --- a/R/dropFeatures.R +++ b/R/dropFeatures.R @@ -7,6 +7,7 @@ #' @export #' @family eda_and_preprocess dropFeatures = function(task, features) { + assertClass(task, classes = "Task") f = getTaskFeatureNames(task) assertSubset(features, choices = f) diff --git a/R/estimateResidualVariance.R b/R/estimateResidualVariance.R index 3218dbda43..ce3aa661ee 100644 --- a/R/estimateResidualVariance.R +++ b/R/estimateResidualVariance.R @@ -17,18 +17,22 @@ #' If missing, `task` must be supplied. #' @export estimateResidualVariance = function(x, task, data, target) { + UseMethod("estimateResidualVariance") } #' @export estimateResidualVariance.Learner = function(x, task, data, target) { - if (missing(task)) + + if (missing(task)) { task = makeRegrTask(data = data, target = target) + } estimateResidualVariance.WrappedModel(train(x, task), task) } #' @export estimateResidualVariance.WrappedModel = function(x, task, data, target) { + if (missing(task)) { task = makeRegrTask(data = data, target = target) } else { diff --git a/R/evalOptimizationState.R b/R/evalOptimizationState.R index c13f0445b5..b14101a85d 100644 --- a/R/evalOptimizationState.R +++ b/R/evalOptimizationState.R @@ -27,17 +27,19 @@ evalOptimizationState = function(learner, task, resampling, measures, par.set, b if (is.error(learner2)) { set.pars.ok = FALSE errmsg = as.character(learner2) - if (show.info) + if (show.info) { messagef("[Tune-x] Setting hyperpars failed: %s", errmsg) + } } } else if (inherits(control, "FeatSelControl")) { task = subsetTask(task, features = bits.to.features(state, task)) } # if no problems: resample + measure time - if (show.info) + if (show.info) { prev.stage = log.fun(learner, task, resampling, measures, par.set, control, opt.path, dob, state, NA_real_, remove.nas, stage = 1L) + } if (set.pars.ok) { exec.time = measureTime({ r = resample.fun(learner2, task, resampling, measures = measures, show.info = FALSE) @@ -60,21 +62,24 @@ evalOptimizationState = function(learner, task, resampling, measures, par.set, b # sort msgs by iters, so iter1, iter2, ... errmsgs = as.character(t(r$err.msgs[, -1L])) notna = !is.na(errmsgs) - if (any(notna)) + if (any(notna)) { errmsg = errmsgs[notna][1L] + } err.dumps = r$err.dumps } else { # we still need to define a non-NULL threshold, if tuning it was requested - if (control$tune.threshold) + if (control$tune.threshold) { threshold = NA_real_ + } } # if eval was not ok, everything should have been initailized to NAs - if (show.info) + if (show.info) { log.fun(learner, task, resampling, measures, par.set, control, opt.path, dob, state, y, remove.nas, stage = 2L, prev.stage = prev.stage) + } list(y = y, exec.time = exec.time, errmsg = errmsg, threshold = threshold, - err.dumps = err.dumps) + err.dumps = err.dumps) } # evaluates a list of states by calling evalOptimizationState @@ -87,10 +92,12 @@ evalOptimizationStates = function(learner, task, resampling, measures, par.set, opt.path, show.info, states, dobs, eols, remove.nas, resample.fun, level) { n = length(states) - if (length(dobs) == 1L) + if (length(dobs) == 1L) { dobs = rep(dobs, n) - if (length(eols) == 1L) + } + if (length(eols) == 1L) { eols = rep(eols, n) + } parallelLibrary("mlr", master = FALSE, level = level, show.info = FALSE) exportMlrOptions(level = level) res.list = parallelMap(evalOptimizationState, dobs, states, level = level, diff --git a/R/extractFDAFeatures.R b/R/extractFDAFeatures.R index bdc5d882cc..4076800589 100644 --- a/R/extractFDAFeatures.R +++ b/R/extractFDAFeatures.R @@ -52,6 +52,7 @@ # reextractFDAFeatures(task, extracted$desc) extractFDAFeatures = function(obj, target = character(0L), feat.methods = list()) { + assertList(feat.methods) UseMethod("extractFDAFeatures") } @@ -62,7 +63,7 @@ extractFDAFeatures.data.frame = function(obj, target = character(0L), feat.metho fdf = getFunctionalFeatures(obj) assertDataFrame(fdf, min.cols = 1L) - assertSubset(unique(names(feat.methods)), choices = c(names(fdf), "all")) + assertSubset(unique(names(feat.methods)), choices = c(names(fdf), "all")) assertCharacter(target) # If the same transform should be applied to all features, rep method and name accordingly @@ -89,16 +90,20 @@ extractFDAFeatures.data.frame = function(obj, target = character(0L), feat.metho # Apply function from x to all functional features and return as list of # lists for each functional feature. extracts = Map(function(x, fd.col) { + list( # feats are the extracted features feats = do.call(x$learn, c(x$args, list(data = obj, target = target, col = fd.col))), args = x$args, # Args passed to x$reextract - reextract = x$reextract # pass on reextraction learner for extraction in prediction + reextract = x$reextract # pass on reextraction learner for extraction in prediction ) }, x = desc$extractFDAFeat, fd.col = desc$fd.cols) # Append Info relevant for reextraction to desc - desc$extractFDAFeat = lapply(extracts, function(x) {c(x["args"], x["reextract"])}) + desc$extractFDAFeat = lapply(extracts, function(x) { + + c(x["args"], x["reextract"]) + }) # Extract feats for every functional feature and cbind to data.frame vals = extractSubList(extracts, "feats", simplify = FALSE) @@ -136,6 +141,7 @@ extractFDAFeatures.Task = function(obj, target = character(0L), feat.methods = l #' @export print.extractFDAFeatDesc = function(x, ...) { + catf("Extraction of features from functional data:") catf("Target: %s", collapse(x$target)) # FIXME: This could be missunderstood @@ -159,22 +165,26 @@ print.extractFDAFeatDesc = function(x, ...) { #' @family extractFDAFeatures #' @export reextractFDAFeatures = function(obj, desc) { + UseMethod("reextractFDAFeatures") } #' @export reextractFDAFeatures.data.frame = function(obj, desc) { + assertClass(desc, classes = "extractFDAFeatDesc") # check for new columns new.cols = names(which(names(obj) %nin% desc$coln)) - if (length(new.cols)) + if (length(new.cols)) { stop("New columns (%s) found in data. Unable to extract.", collapse(new.cols)) + } # reextract features using reextractDescription and return reextract = Map( function(xn, x, fd.col) { + do.call(x$reextract, c(list(data = obj, target = desc$target, col = fd.col), x$args)) }, xn = names(desc$extractFDAFeat), x = desc$extractFDAFeat, fd.col = desc$fd.cols) @@ -191,6 +201,7 @@ reextractFDAFeatures.data.frame = function(obj, desc) { #' @export reextractFDAFeatures.Task = function(obj, desc) { + # get data and pass to extractor df = getTaskData(obj, functionals.as = "matrix") extracted = reextractFDAFeatures.data.frame(df, desc) diff --git a/R/extractFDAFeaturesMethods.R b/R/extractFDAFeaturesMethods.R index 408e0992a0..dfcf74c90f 100644 --- a/R/extractFDAFeaturesMethods.R +++ b/R/extractFDAFeaturesMethods.R @@ -29,6 +29,7 @@ #' @export #' @family fda makeExtractFDAFeatMethod = function(learn, reextract, args = list()) { + assertFunction(learn, args = c("data", "target", "col")) assertFunction(reextract, args = c("data", "target", "col")) assertList(args, names = "named") @@ -50,6 +51,7 @@ makeExtractFDAFeatMethod = function(learn, reextract, args = list()) { #' @export #' @family fda_featextractor extractFDAFourier = function(trafo.coeff = "phase") { + # create a function that calls extractFDAFeatFourier assertChoice(trafo.coeff, choices = c("phase", "amplitude")) @@ -106,10 +108,12 @@ extractFDAFourier = function(trafo.coeff = "phase") { #' @export #' @family fda_featextractor extractFDAWavelets = function(filter = "la8", boundary = "periodic") { + assertCharacter(filter) assertChoice(boundary, c("periodic", "reflection")) lrn = function(data, target = NULL, col, filter, boundary) { + requirePackages("wavelets", default.method = "load") assertClass(data, "data.frame") @@ -117,6 +121,7 @@ extractFDAWavelets = function(filter = "la8", boundary = "periodic") { df = convertRowsToList(data[, col, drop = FALSE]) wtdata = t(dapply(df, fun = function(x) { + wt = wavelets::dwt(as.numeric(x), filter = filter, boundary = boundary) unlist(c(wt@W, wt@V[[wt@level]])) })) @@ -144,10 +149,12 @@ extractFDAWavelets = function(filter = "la8", boundary = "periodic") { #' @export #' @family fda_featextractor extractFDAFPCA = function(pve = 0.99, npc = NULL) { + assertNumber(pve, lower = 0, upper = 1) assertCount(npc, null.ok = TRUE) lrn = function(data, target, col, vals, pve, npc) { + requirePackages("mboost", default.method = "load") requirePackages("refund", default.method = "load") assert( @@ -158,14 +165,15 @@ extractFDAFPCA = function(pve = 0.99, npc = NULL) { data = data[, col, drop = FALSE] # transform dataframe into matrix - if (inherits(data, "data.frame")) + if (inherits(data, "data.frame")) { data = as.matrix(data) + } # extract fpca features # FIXME: Add other fpca. options, maybe via function args ? rst = refund::fpca.sc(Y = data, pve = pve, npc = npc) # Order the columns by score - features.fpca = rst$scores[, order(rst$evalues, decreasing = TRUE)] + features.fpca = rst$scores[, order(rst$evalues, decreasing = TRUE)] df.fpca = as.data.frame(features.fpca) names(df.fpca) = paste0("Fpca", seq_len(ncol(df.fpca))) return(df.fpca) @@ -194,18 +202,22 @@ extractFDAMultiResFeatures = function(res.level = 3L, shift = 0.5, curve.lens = # Helper function for getFDAMultiResFeatures, extracts for a whole subsequence. getUniFDAMultiResFeatures = function(data, res.level, shift) { + feat.list = apply(data, 1, getCurveFeatures, res.level = res.level, shift = shift) data.frame(t(feat.list)) } getFDAMultiResFeatures = function(data, res.level = 3L, shift = 0.5, curve.lens) { + # Assert that curve.lens sums up to ncol(data) stopifnot(sum(curve.lens) == ncol(data)) clsum = cumsum(curve.lens) feat.list = apply(data, 1, function(x) { + # Extract the data from the different subcurves specified by curve.lens subfeats = Map(function(seqstart, seqend) { + getCurveFeatures(x[seqstart:seqend], res.level = res.level, shift = shift) }, clsum - curve.lens + 1, cumsum(curve.lens)) # And return as vector @@ -217,39 +229,43 @@ extractFDAMultiResFeatures = function(res.level = 3L, shift = 0.5, curve.lens = # Get Features from a single (sub-)curve getCurveFeatures = function(x, res.level = 3, shift = 0.5) { + m = length(x) start = 1L feats = numeric(0L) - ssize = m # initialize segment size to be the length of the curve - for (rl in 1:res.level) { # ssize is divided by 2 at the end of the loop - soffset = ceiling(shift * ssize) # overlap distance + ssize = m # initialize segment size to be the length of the curve + for (rl in 1:res.level) { # ssize is divided by 2 at the end of the loop + soffset = ceiling(shift * ssize) # overlap distance # messagef("reslev = %i, ssize = %i, soffset=%i", rl, ssize, soffset) sstart = 1L - send = sstart + ssize - 1L # end position - while (send <= m) { # until the segment reach the end + send = sstart + ssize - 1L # end position + while (send <= m) { # until the segment reach the end # messagef("start, end: %i, %i", sstart, send) f = getSegmentFeatures(x[sstart:send]) # print(f) - feats = c(feats, f) # append the feats from the last resolution hierachy + feats = c(feats, f) # append the feats from the last resolution hierachy sstart = sstart + soffset send = send + soffset } - ssize = ceiling(ssize / 2) # decrease the segment size - if (ssize < 1L) # if the the divide by 2 is too much + ssize = ceiling(ssize / 2) # decrease the segment size + if (ssize < 1L) { # if the the divide by 2 is too much break + } } return(feats) } getSegmentFeatures = function(x) { + mean(x) } lrn = function(data, target, col, res.level, shift, curve.lens) { data = data[, col, drop = FALSE] - if (is.data.frame(data)) + if (is.data.frame(data)) { data = as.matrix(data) + } assertMatrix(data, mode = "numeric") # The difference is that for the getFDAMultiResFeatures, the curve is again subdivided into diff --git a/R/extractFDAFeaturesWrapper.R b/R/extractFDAFeaturesWrapper.R index 225b8179fd..41228e428a 100644 --- a/R/extractFDAFeaturesWrapper.R +++ b/R/extractFDAFeaturesWrapper.R @@ -24,12 +24,14 @@ makeExtractFDAFeatsWrapper = function(learner, feat.methods = list()) { rm(list = names(args)) trainfun = function(data, target, args) { + l = do.call(extractFDAFeatures, c(list(obj = data, target = target), args)) - names(l) = c("data", "control") + names(l) = c("data", "control") l } predictfun = function(data, target, args, control) { + reextractFDAFeatures(data, control) } @@ -40,5 +42,6 @@ makeExtractFDAFeatsWrapper = function(learner, feat.methods = list()) { #' @export getLearnerProperties.extractFDAFeatsWrapper = function(learner) { + union(getLearnerProperties(learner$next.learner), c("functionals", "single.functional")) } diff --git a/R/filterFeatures.R b/R/filterFeatures.R index f2ddf11986..7ebcbe2a69 100644 --- a/R/filterFeatures.R +++ b/R/filterFeatures.R @@ -44,8 +44,9 @@ #' @export #' @family filter filterFeatures = function(task, method = "randomForestSRC_importance", fval = NULL, - perc = NULL, abs = NULL, threshold = NULL, mandatory.feat = NULL, - cache = FALSE, ...) { + perc = NULL, abs = NULL, threshold = NULL, mandatory.feat = NULL, + cache = FALSE, ...) { + assertClass(task, "SupervisedTask") assertChoice(method, choices = ls(.FilterRegister)) select = checkFilterArguments(perc, abs, threshold) @@ -64,12 +65,13 @@ filterFeatures = function(task, method = "randomForestSRC_importance", fval = NU # check for user defined cache dir if (is.character(cache)) { assertString(cache) - if(!dir.exists(cache)) + if (!dir.exists(cache)) { dir.create(cache, recursive = TRUE) + } cache.dir = cache } else { assertFlag(cache) - if(!dir.exists(rappdirs::user_cache_dir("mlr", "mlr-org"))) { + if (!dir.exists(rappdirs::user_cache_dir("mlr", "mlr-org"))) { dir.create(rappdirs::user_cache_dir("mlr", "mlr-org")) } cache.dir = rappdirs::user_cache_dir("mlr", "mlr-org") @@ -105,15 +107,18 @@ filterFeatures = function(task, method = "randomForestSRC_importance", fval = NU if (!is.null(mandatory.feat)) { assertCharacter(mandatory.feat) - if (!all(mandatory.feat %in% fval$name)) + if (!all(mandatory.feat %in% fval$name)) { stop("At least one mandatory feature was not found in the task.") - if (select != "threshold" && nselect < length(mandatory.feat)) + } + if (select != "threshold" && nselect < length(mandatory.feat)) { stop("The number of features to be filtered cannot be smaller than the number of mandatory features.") - #Set the the filter values of the mandatory features to infinity to always select them + } + # Set the the filter values of the mandatory features to infinity to always select them fval[fval$name %in% mandatory.feat, method] = Inf } - if (select == "threshold") + if (select == "threshold") { nselect = sum(fval[[method]] >= threshold, na.rm = TRUE) + } features = as.character(head(sortByCol(fval, method, asc = FALSE)$name, nselect)) allfeats = getTaskFeatureNames(task) j = match(features, allfeats) @@ -122,11 +127,14 @@ filterFeatures = function(task, method = "randomForestSRC_importance", fval = NU } checkFilterArguments = function(perc, abs, threshold) { + sum.null = sum(!is.null(perc), !is.null(abs), !is.null(threshold)) - if (sum.null == 0L) + if (sum.null == 0L) { stop("At least one of 'perc', 'abs' or 'threshold' must be not NULL") - if (sum.null >= 2L) + } + if (sum.null >= 2L) { stop("Arguments 'perc', 'abs' and 'threshold' are mutually exclusive") + } if (!is.null(perc)) { assertNumber(perc, lower = 0, upper = 1) diff --git a/R/fixDataForLearner.R b/R/fixDataForLearner.R index c2a25059ab..528baa9e1e 100644 --- a/R/fixDataForLearner.R +++ b/R/fixDataForLearner.R @@ -1,4 +1,5 @@ getFixDataInfo = function(data, restore.levels = FALSE, factors.to.dummies = FALSE, ordered.to.int = FALSE) { + assertDataFrame(data, types = c("logical", "numeric", "factor")) assertFlag(restore.levels) assertFlag(factors.to.dummies) @@ -18,10 +19,12 @@ getFixDataInfo = function(data, restore.levels = FALSE, factors.to.dummies = FAL } fixDataForLearner = function(data, info) { + cn = c(names(info$factors), names(info$ordered)) not.found = which.first(cn %nin% names(data)) - if (length(not.found) > 0L) + if (length(not.found) > 0L) { stopf("Column '%s' found in info, but not in new data", cn[not.found]) + } if (info$restore.levels) { if (!info$factors.to.dummies && length(info$factors) > 0L) { @@ -37,6 +40,7 @@ fixDataForLearner = function(data, info) { if (info$factors.to.dummies) { cols = names(info$factors) new.cols = Map(function(x, lvls) { + as.data.frame(setNames(lapply(lvls, "==", x), lvls)) }, x = data[cols], lvls = info$factors) data = cbind(dropNamed(data, cols), do.call(cbind, new.cols)) diff --git a/R/friedmanPostHocTestBMR.R b/R/friedmanPostHocTestBMR.R index 5d9f85b1ec..9cbb4e2049 100644 --- a/R/friedmanPostHocTestBMR.R +++ b/R/friedmanPostHocTestBMR.R @@ -29,24 +29,27 @@ #' @examples #' # see benchmark friedmanPostHocTestBMR = function(bmr, measure = NULL, p.value = 0.05, aggregation = "default") { + requirePackages("PMCMR") assertClass(bmr, "BenchmarkResult") assertNumeric(p.value, lower = 0, upper = 1, len = 1) assertChoice(aggregation, c("default", "mean")) measure = checkBMRMeasure(measure, bmr) n.learners = length(bmr$learners) - if (n.learners < 2) + if (n.learners < 2) { stop("Benchmark results for at least two learners are required") + } n.tasks = length(bmr$results) - if (n.tasks < 2) + if (n.tasks < 2) { stop("Benchmark results for at least two tasks are required") + } # aggregate over iterations if (aggregation == "mean") { df = as.data.frame(bmr) df = aggregate(df[[measure$id]], - by = list(task.id = df$task.id, learner.id = df$learner.id), - FUN = mean) + by = list(task.id = df$task.id, learner.id = df$learner.id), + FUN = mean) aggr.meas = "x" } else if (aggregation == "default") { aggr.meas = measureAggrName(measure) @@ -56,9 +59,10 @@ friedmanPostHocTestBMR = function(bmr, measure = NULL, p.value = 0.05, aggregati f.test = friedmanTestBMR(bmr, measure) if (!is.na(f.test$p.value)) { f.rejnull = f.test$p.value < p.value - if (!f.rejnull) + if (!f.rejnull) { warning("Cannot reject null hypothesis of overall Friedman test, returning overall Friedman test.") + } } else { f.rejnull = FALSE warning("P-value not computable. Learner performances might be exactly equal.") diff --git a/R/friedmanTestBMR.R b/R/friedmanTestBMR.R index bfb3ec22ff..1ddd0e4045 100644 --- a/R/friedmanTestBMR.R +++ b/R/friedmanTestBMR.R @@ -21,11 +21,13 @@ friedmanTestBMR = function(bmr, measure = NULL, aggregation = "default") { measure = checkBMRMeasure(measure, bmr) assertChoice(aggregation, c("default", "mean")) n.learners = length(bmr$learners) - if (n.learners < 2) + if (n.learners < 2) { stop("Benchmark results for at least two learners are required") + } n.tasks = length(bmr$results) - if (n.tasks < 2) + if (n.tasks < 2) { stop("Benchmark results for at least two tasks are required") + } # aggregate mean or default over iterations if (aggregation == "mean") { diff --git a/R/generateCalibration.R b/R/generateCalibration.R index 0766fc9117..ade9b2ed1d 100644 --- a/R/generateCalibration.R +++ b/R/generateCalibration.R @@ -53,22 +53,26 @@ generateCalibrationData = function(obj, breaks = "Sturges", groups = NULL, task. UseMethod("generateCalibrationData") #' @export generateCalibrationData.Prediction = function(obj, breaks = "Sturges", groups = NULL, task.id = NULL) { + checkPrediction(obj, task.type = "classif", predict.type = "prob") generateCalibrationData.list(namedList("prediction", obj), breaks, groups, task.id) } #' @export generateCalibrationData.ResampleResult = function(obj, breaks = "Sturges", groups = NULL, task.id = NULL) { + obj = getRRPredictions(obj) checkPrediction(obj, task.type = "classif", predict.type = "prob") generateCalibrationData.Prediction(obj, breaks, groups, task.id) } #' @export generateCalibrationData.BenchmarkResult = function(obj, breaks = "Sturges", groups = NULL, task.id = NULL) { + tids = getBMRTaskIds(obj) - if (is.null(task.id)) + if (is.null(task.id)) { task.id = tids[1L] - else + } else { assertChoice(task.id, tids) + } obj = getBMRPredictions(obj, task.ids = task.id, as.df = FALSE)[[1L]] for (x in obj) @@ -77,19 +81,22 @@ generateCalibrationData.BenchmarkResult = function(obj, breaks = "Sturges", grou } #' @export generateCalibrationData.list = function(obj, breaks = "Sturges", groups = NULL, task.id = NULL) { + assertList(obj, c("Prediction", "ResampleResult"), min.len = 1L) ## unwrap ResampleResult to Prediction and set default names if (inherits(obj[[1L]], "ResampleResult")) { - if (is.null(names(obj))) + if (is.null(names(obj))) { names(obj) = extractSubList(obj, "learner.id") + } obj = extractSubList(obj, "pred", simplify = FALSE) } assertList(obj, names = "unique") td = obj[[1L]]$task.desc out = lapply(obj, function(pred) { + df = data.table("truth" = getPredictionTruth(pred), - getPredictionProbabilities(pred, cl = getTaskClassLevels(td))) + getPredictionProbabilities(pred, cl = getTaskClassLevels(td))) df = melt(df, id.vars = "truth", value.name = "Probability", variable.name = "Class") if (is.null(groups)) { @@ -101,6 +108,7 @@ generateCalibrationData.list = function(obj, breaks = "Sturges", groups = NULL, df$bin = Hmisc::cut2(df$Probability, g = groups, digits = 3) } fun = function(x) { + tab = table(x$Class, x$truth) s = rowSums(tab) as.list(ifelse(s == 0, 0, diag(tab) / s)) @@ -114,7 +122,7 @@ generateCalibrationData.list = function(obj, breaks = "Sturges", groups = NULL, data = data[data$Class != td$negative, ] } max.bin = sapply(stri_split(levels(proportion$bin), regex = ",|]|\\)"), - function(x) as.numeric(x[length(x)])) + function(x) as.numeric(x[length(x)])) proportion$bin = ordered(proportion$bin, levels = levels(proportion$bin)[order(max.bin)]) proportion = melt(proportion, id.vars = c("Learner", "bin"), value.name = "Proportion", variable.name = "Class") data$bin = ordered(data$bin, levels = levels(data$bin)[order(max.bin)]) @@ -122,9 +130,9 @@ generateCalibrationData.list = function(obj, breaks = "Sturges", groups = NULL, setDF(proportion) makeS3Obj("CalibrationData", - proportion = proportion, - data = data, - task = td) + proportion = proportion, + data = data, + task = td) } #' @title Plot calibration data using ggplot2. #' @@ -152,13 +160,13 @@ generateCalibrationData.list = function(obj, breaks = "Sturges", groups = NULL, #' @examples #' \dontrun{ #' lrns = list(makeLearner("classif.rpart", predict.type = "prob"), -#' makeLearner("classif.nnet", predict.type = "prob")) +#' makeLearner("classif.nnet", predict.type = "prob")) #' fit = lapply(lrns, train, task = iris.task) #' pred = lapply(fit, predict, task = iris.task) #' names(pred) = c("rpart", "nnet") #' out = generateCalibrationData(pred, groups = 3) #' plotCalibration(out) -#' +#' #' fit = lapply(lrns, train, task = sonar.task) #' pred = lapply(fit, predict, task = sonar.task) #' names(pred) = c("rpart", "lda") @@ -166,6 +174,7 @@ generateCalibrationData.list = function(obj, breaks = "Sturges", groups = NULL, #' plotCalibration(out) #' } plotCalibration = function(obj, smooth = FALSE, reference = TRUE, rag = TRUE, facet.wrap.nrow = NULL, facet.wrap.ncol = NULL) { + assertClass(obj, "CalibrationData") assertFlag(smooth) assertFlag(reference) @@ -176,17 +185,19 @@ plotCalibration = function(obj, smooth = FALSE, reference = TRUE, rag = TRUE, fa p = ggplot(obj$proportion, aes_string("bin", "Proportion", color = "Class", group = "Class")) p = p + scale_x_discrete(drop = FALSE) - if (smooth) + if (smooth) { p = p + stat_smooth(se = FALSE, span = 2, method = "loess") - else + } else { p = p + geom_point() + geom_line() + } if (length(unique(obj$proportion$Learner)) > 1L) { - p = p + facet_wrap(~ Learner, nrow = facet.wrap.nrow, ncol = facet.wrap.ncol) + p = p + facet_wrap(~Learner, nrow = facet.wrap.nrow, ncol = facet.wrap.ncol) } - if (reference) + if (reference) { p = p + geom_segment(aes_string(1, 0, xend = "xend", yend = 1), colour = "black", linetype = "dashed") + } if (rag) { top.data = obj$data[obj$data$truth == obj$data$Class, ] diff --git a/R/generateFeatureImportance.R b/R/generateFeatureImportance.R index e4457ce73f..850aecdad5 100644 --- a/R/generateFeatureImportance.R +++ b/R/generateFeatureImportance.R @@ -73,12 +73,11 @@ #' } #' #' @examples -#' +#' #' lrn = makeLearner("classif.rpart", predict.type = "prob") #' fit = train(lrn, iris.task) #' imp = generateFeatureImportanceData(iris.task, "permutation.importance", #' lrn, "Petal.Width", nmc = 10L, local = TRUE) -#' #' @references Jerome Friedman; Greedy Function Approximation: A Gradient Boosting Machine, Annals of Statistics, Vol. 29, No. 5 (Oct., 2001), pp. 1189-1232. #' @export generateFeatureImportanceData = function(task, method = "permutation.importance", @@ -88,26 +87,32 @@ generateFeatureImportanceData = function(task, method = "permutation.importance" learner = checkLearner(learner) measure = checkMeasures(measure, learner) - if (length(measure) > 1L) + if (length(measure) > 1L) { stop("only one measure is allowed.") - if (getTaskType(task) != learner$type) + } + if (getTaskType(task) != learner$type) { stopf("Expected task of type '%s', not '%s'", getTaskType(task), learner$type) + } assertCount(nmc) test.contrast = contrast(1, 1) - if (!(is.numeric(test.contrast))) + if (!(is.numeric(test.contrast))) { stop("the contrast function must return a numeric vector.") - if (!length(test.contrast) == 1L) + } + if (!length(test.contrast) == 1L) { stop("the contrast function must return a numeric vector the same length as the input.") + } test.aggregation = aggregation(1:2) - if (!is.numeric(test.aggregation)) + if (!is.numeric(test.aggregation)) { stop("aggregation argument doesn't return a numeric vector.") - if (!(length(test.aggregation) == 1L)) + } + if (!(length(test.aggregation) == 1L)) { stop("aggregation function must either return 1 number or a numeric vector of the same length as the number of rows in the task data.frame.") + } out = switch(method, "permutation.importance" = doPermutationImportance( task, learner, features, interaction, measure, contrast, aggregation, nmc, replace, local) - ) + ) makeS3Obj( "FeatureImportance", @@ -135,6 +140,7 @@ doPermutationImportance = function(task, learner, features, interaction, measure if (local) { # subset the prediction data element to compute the per-observation performance perf = vnapply(1:getTaskSize(task), function(i) { + pred$data = pred$data[i, ] performance(pred, measure) }) @@ -149,13 +155,14 @@ doPermutationImportance = function(task, learner, features, interaction, measure if (nmc == -1L) { ## from http://stackoverflow.com/questions/11095992/generating-all-distinct-permutations-of-a-list-in-r permutations = function(n) { - if (n == 1L){ + + if (n == 1L) { return(matrix(1L)) } else { sp = permutations(n - 1L) p = nrow(sp) A = matrix(nrow = n, ncol = n * p) - for (i in 1:n){ + for (i in 1:n) { A[, (i - 1) * p + 1:p] = rbind(i, sp + (sp >= i)) } return(A) @@ -167,7 +174,7 @@ doPermutationImportance = function(task, learner, features, interaction, measure } args = list(measure = measure, contrast = contrast, data = data, - perf = perf, fit = fit, indices = indices) + perf = perf, fit = fit, indices = indices) doPermutationImportanceIteration = function(perf, fit, data, measure, contrast, indices, i, x) { @@ -176,6 +183,7 @@ doPermutationImportance = function(task, learner, features, interaction, measure if (local) { perf.permuted = lapply(seq_len(getTaskSize(task)), function(i, pred) { + pred$data = pred$data[i, ] performance(pred, measure) }, pred = predict(fit, newdata = data)) @@ -195,6 +203,7 @@ doPermutationImportance = function(task, learner, features, interaction, measure colnames(out) = stri_paste(features, collapse = ":") } else { out = lapply(features, function(x) { + parallelMap(doPermutationImportanceIteration, i = seq_len(nmc), more.args = c(args, x = x)) }) out = lapply(out, function(x) apply(do.call("rbind", x), 2, aggregation)) @@ -207,6 +216,7 @@ doPermutationImportance = function(task, learner, features, interaction, measure #' @export print.FeatureImportance = function(x, ...) { + catf("FeatureImportance:") catf("Task: %s", x$task.desc$id) catf("Interaction: %s", x$interaction) diff --git a/R/generateFilterValues.R b/R/generateFilterValues.R index 20d3163eb1..469c865854 100644 --- a/R/generateFilterValues.R +++ b/R/generateFilterValues.R @@ -44,31 +44,34 @@ generateFilterValuesData = function(task, method = "randomForestSRC_importance", pkgs = lapply(pkgs, function(x) requirePackages(x, why = "generateFilterValuesData", default.method = "load")) } check.task = sapply(filter, function(x) td$type %nin% x$supported.tasks) - if (any(check.task)) + if (any(check.task)) { stopf("Filter(s) %s not compatible with task of type '%s'", - stri_paste("'", method[check.task], "'", collapse = ", "), td$type) + stri_paste("'", method[check.task], "'", collapse = ", "), td$type) + } check.feat = lapply(filter, function(x) setdiff(names(td$n.feat[td$n.feat > 0L]), x$supported.features)) check.length = sapply(check.feat, length) > 0L if (any(check.length)) { stopf("Filter(s) %s not compatible with features of type %s respectively", - stri_paste("'", method[check.length], "'", collapse = ", "), - stri_paste(sapply(check.feat[check.length], function(x) stri_paste("'", x, "'", collapse = ", ")), collapse = ", and ")) + stri_paste("'", method[check.length], "'", collapse = ", "), + stri_paste(sapply(check.feat[check.length], function(x) stri_paste("'", x, "'", collapse = ", ")), collapse = ", and ")) } assertCount(nselect) assertList(more.args, names = "unique", max.len = length(method)) assertSubset(names(more.args), method) dot.args = list(...) - if (length(dot.args) > 0L && length(more.args) > 0L) + if (length(dot.args) > 0L && length(more.args) > 0L) { stopf("Do not use both 'more.args' and '...' here!") + } # we have dot.args, so we cannot have more.args. either complain (> 1 method) or # auto-setup more.args as list if (length(dot.args) > 0L) { - if (length(method) == 1L) - more.args = namedList(method, dot.args) - else + if (length(method) == 1L) { + more.args = namedList(method, dot.args) + } else { stopf("You use more than 1 filter method. Please pass extra arguments via 'more.args' and not '...' to filter methods!") + } } fn = getTaskFeatureNames(task) @@ -76,6 +79,7 @@ generateFilterValuesData = function(task, method = "randomForestSRC_importance", fval = do.call(filter[[1]]$fun, c(list(task = task, nselect = nselect), more.args[[filter[[1]]$name]])) fval = lapply(filter, function(x) { + x = do.call(x$fun, c(list(task = task), nselect = nselect, more.args[[x$name]])) missing.score = setdiff(fn, names(x)) x[missing.score] = NA_real_ @@ -86,14 +90,15 @@ generateFilterValuesData = function(task, method = "randomForestSRC_importance", colnames(fval) = method types = vcapply(getTaskData(task, target.extra = TRUE)$data[fn], getClass1) out = data.frame(name = row.names(fval), - type = types, - fval, row.names = NULL, stringsAsFactors = FALSE) + type = types, + fval, row.names = NULL, stringsAsFactors = FALSE) makeS3Obj("FilterValues", - task.desc = td, - data = out) + task.desc = td, + data = out) } #' @export print.FilterValues = function(x, ...) { + catf("FilterValues:") catf("Task: %s", x$task.desc$id) printHead(x$data, ...) @@ -124,10 +129,12 @@ print.FilterValues = function(x, ...) { #' fv = generateFilterValuesData(iris.task, method = "variance") #' plotFilterValues(fv) plotFilterValues = function(fvalues, sort = "dec", n.show = 20L, feat.type.cols = FALSE, facet.wrap.nrow = NULL, facet.wrap.ncol = NULL) { + assertClass(fvalues, classes = "FilterValues") assertChoice(sort, choices = c("dec", "inc", "none")) - if (!(is.null(fvalues$method))) + if (!(is.null(fvalues$method))) { stop("fvalues must be generated by generateFilterValuesData, not getFilterValues, which is deprecated.") + } n.show = asCount(n.show) @@ -143,25 +150,26 @@ plotFilterValues = function(fvalues, sort = "dec", n.show = 20L, feat.type.cols } data$name = factor(data$name, levels = as.character(unique(data$name))) - if (feat.type.cols) + if (feat.type.cols) { mp = aes_string(x = "name", y = "value", fill = "type") - else + } else { mp = aes_string(x = "name", y = "value") + } plt = ggplot(data = data, mapping = mp) plt = plt + geom_bar(position = "identity", stat = "identity") if (length(unique(data$method)) > 1L) { - plt = plt + facet_wrap(~ method, scales = "free_y", + plt = plt + facet_wrap(~method, scales = "free_y", nrow = facet.wrap.nrow, ncol = facet.wrap.ncol) plt = plt + labs(title = sprintf("%s (%i features)", - fvalues$task.desc$id, - sum(fvalues$task.desc$n.feat)), - x = "", y = "") + fvalues$task.desc$id, + sum(fvalues$task.desc$n.feat)), + x = "", y = "") } else { plt = plt + labs(title = sprintf("%s (%i features), filter = %s", - fvalues$task.desc$id, - sum(fvalues$task.desc$n.feat), - methods), - x = "", y = "") + fvalues$task.desc$id, + sum(fvalues$task.desc$n.feat), + methods), + x = "", y = "") } plt = plt + theme(axis.text.x = element_text(angle = 45, hjust = 1)) return(plt) diff --git a/R/generateHyperParsEffect.R b/R/generateHyperParsEffect.R index 02260a33a7..757fabf591 100644 --- a/R/generateHyperParsEffect.R +++ b/R/generateHyperParsEffect.R @@ -37,25 +37,26 @@ #' diagnostic info, a flag for whether nested cv was used, a flag for whether #' partial dependence should be generated, and the optimization algorithm used. #' -#' @examples \dontrun{ +#' @examples +#' \dontrun{ #' # 3-fold cross validation #' ps = makeParamSet(makeDiscreteParam("C", values = 2^(-4:4))) #' ctrl = makeTuneControlGrid() #' rdesc = makeResampleDesc("CV", iters = 3L) #' res = tuneParams("classif.ksvm", task = pid.task, resampling = rdesc, -#' par.set = ps, control = ctrl) +#' par.set = ps, control = ctrl) #' data = generateHyperParsEffectData(res) #' plt = plotHyperParsEffect(data, x = "C", y = "mmce.test.mean") #' plt + ylab("Misclassification Error") -#' +#' #' # nested cross validation #' ps = makeParamSet(makeDiscreteParam("C", values = 2^(-4:4))) #' ctrl = makeTuneControlGrid() #' rdesc = makeResampleDesc("CV", iters = 3L) #' lrn = makeTuneWrapper("classif.ksvm", control = ctrl, -#' resampling = rdesc, par.set = ps) +#' resampling = rdesc, par.set = ps) #' res = resample(lrn, task = pid.task, resampling = cv2, -#' extract = getTuneResult) +#' extract = getTuneResult) #' data = generateHyperParsEffectData(res) #' plotHyperParsEffect(data, x = "C", y = "mmce.test.mean", plot.type = "line") #' } @@ -72,14 +73,16 @@ generateHyperParsEffectData = function(tune.result, include.diagnostics = FALSE, assertFlag(partial.dep) # in case we have nested CV - if (getClass1(tune.result) == "ResampleResult"){ + if (getClass1(tune.result) == "ResampleResult") { d = getNestedTuneResultsOptPathDf(tune.result, trafo = trafo) num.hypers = length(tune.result$extract[[1]]$x) - if ((num.hypers > 2) && !partial.dep) + if ((num.hypers > 2) && !partial.dep) { stopf("Partial dependence must be requested with partial.dep when tuning more than 2 hyperparameters") + } for (hyp in 1:num.hypers) { - if (!is.numeric(d[, hyp])) + if (!is.numeric(d[, hyp])) { d[, hyp] = type.convert(as.character(d[, hyp])) + } } # rename to be clear this denotes the nested cv names(d)[names(d) == "iter"] = "nested_cv_run" @@ -90,18 +93,20 @@ generateHyperParsEffectData = function(tune.result, include.diagnostics = FALSE, optimization = getClass1(tune.result$extract[[1]]$control) nested = TRUE } else { - if (trafo){ + if (trafo) { d = as.data.frame(trafoOptPath(tune.result$opt.path)) } else { d = as.data.frame(tune.result$opt.path) } # what if we have numerics that were discretized upstream num.hypers = length(tune.result$x) - if ((num.hypers > 2) && !partial.dep) + if ((num.hypers > 2) && !partial.dep) { stopf("Partial dependence must be requested with partial.dep when tuning more than 2 hyperparameters") + } for (hyp in 1:num.hypers) { - if (!is.numeric(d[, hyp])) + if (!is.numeric(d[, hyp])) { d[, hyp] = type.convert(as.character(d[, hyp])) + } } measures = tune.result$opt.path$y.names hyperparams = names(tune.result$x) @@ -110,8 +115,9 @@ generateHyperParsEffectData = function(tune.result, include.diagnostics = FALSE, } # off by default unless needed by user - if (include.diagnostics == FALSE) + if (include.diagnostics == FALSE) { d = within(d, rm("eol", "error.message")) + } # users might not know what dob means, so let's call it iteration names(d)[names(d) == "dob"] = "iteration" @@ -126,13 +132,15 @@ generateHyperParsEffectData = function(tune.result, include.diagnostics = FALSE, #' @export print.HyperParsEffectData = function(x, ...) { + catf("HyperParsEffectData:") catf("Hyperparameters: %s", collapse(x$hyperparams)) catf("Measures: %s", collapse(x$measures)) catf("Optimizer: %s", collapse(x$optimization)) catf("Nested CV Used: %s", collapse(x$nested)) - if (x$partial) + if (x$partial) { print("Partial dependence requested") + } catf("Snapshot of data:") print(head(x$data)) } @@ -246,10 +254,10 @@ plotHyperParsEffect = function(hyperpars.effect.data, x = NULL, y = NULL, assertSubset(facet, choices = names(hyperpars.effect.data$data)) assertFlag(global.only) assert(checkClass(interpolate, "Learner"), checkString(interpolate), - checkNull(interpolate)) + checkNull(interpolate)) # assign learner for interpolation if (checkClass(interpolate, "Learner") == TRUE || - checkString(interpolate) == TRUE) { + checkString(interpolate) == TRUE) { lrn = checkLearner(interpolate, "regr") } assertFlag(show.experiments) @@ -258,17 +266,20 @@ plotHyperParsEffect = function(hyperpars.effect.data, x = NULL, y = NULL, assert(checkClass(partial.dep.learn, "Learner"), checkString(partial.dep.learn), checkNull(partial.dep.learn)) if (checkClass(partial.dep.learn, "Learner") == TRUE || - checkString(partial.dep.learn) == TRUE) { + checkString(partial.dep.learn) == TRUE) { lrn = checkLearner(partial.dep.learn, "regr") } - if (!is.null(partial.dep.learn) && !is.null(interpolate)) + if (!is.null(partial.dep.learn) && !is.null(interpolate)) { stopf("partial.dep.learn and interpolate can't be simultaneously requested!") - if (length(x) > 1 || length(y) > 1 || length(z) > 1 || length(facet) > 1) + } + if (length(x) > 1 || length(y) > 1 || length(z) > 1 || length(facet) > 1) { stopf("Greater than 1 length x, y, z or facet not yet supported") + } d = hyperpars.effect.data$data - if (hyperpars.effect.data$nested) + if (hyperpars.effect.data$nested) { d$nested_cv_run = as.factor(d$nested_cv_run) + } # gather names hypers = hyperpars.effect.data$hyperparams @@ -281,18 +292,19 @@ plotHyperParsEffect = function(hyperpars.effect.data, x = NULL, y = NULL, heatcontour.flag = plot.type %in% c("heatmap", "contour") partial.flag = hyperpars.effect.data$partial - if (partial.flag && is.null(partial.dep.learn)) + if (partial.flag && is.null(partial.dep.learn)) { stopf("Partial dependence requested but partial.dep.learn not specified!") + } # deal with NAs where optimizer failed - if (na.flag){ + if (na.flag) { d$learner_status = ifelse(is.na(d[, "exec.time"]), "Failure", "Success") for (col in hyperpars.effect.data$measures) { col.name = stri_split_fixed(col, ".test.mean", omit_empty = TRUE)[[1]] - if (heatcontour.flag){ + if (heatcontour.flag) { d[, col][is.na(d[, col])] = get(col.name)$worst } else { - if (get(col.name)$minimize){ + if (get(col.name)$minimize) { d[, col][is.na(d[, col])] = max(d[, col], na.rm = TRUE) } else { d[, col][is.na(d[, col])] = min(d[, col], na.rm = TRUE) @@ -335,10 +347,10 @@ plotHyperParsEffect = function(hyperpars.effect.data, x = NULL, y = NULL, } } else { # assign for global only - if (global.only && x == "iteration" && y %in% hyperpars.effect.data$measures){ + if (global.only && x == "iteration" && y %in% hyperpars.effect.data$measures) { for (col in hyperpars.effect.data$measures) { col.name = stri_split_fixed(col, ".test.mean", omit_empty = TRUE)[[1]] - if (get(col.name)$minimize){ + if (get(col.name)$minimize) { d[, col] = cummin(d[, col]) } else { d[, col] = cummax(d[, col]) @@ -346,18 +358,18 @@ plotHyperParsEffect = function(hyperpars.effect.data, x = NULL, y = NULL, } } - if ((!is.null(interpolate)) && z.flag && (heatcontour.flag)){ + if ((!is.null(interpolate)) && z.flag && (heatcontour.flag)) { # create grid xo = seq(min(d[, x]), max(d[, x]), length.out = 100) yo = seq(min(d[, y]), max(d[, y]), length.out = 100) grid = expand.grid(xo, yo, KEEP.OUT.ATTRS = FALSE) names(grid) = c(x, y) - if (hyperpars.effect.data$nested){ + if (hyperpars.effect.data$nested) { d.new = d new.d = data.frame() # for loop for each nested cv run - for (run in unique(d$nested_cv_run)){ + for (run in unique(d$nested_cv_run)) { d.run = d.new[d.new$nested_cv_run == run, ] regr.task = makeRegrTask(id = "interp", data = d.run[, c(x, y, z)], target = z) @@ -389,13 +401,13 @@ plotHyperParsEffect = function(hyperpars.effect.data, x = NULL, y = NULL, d = grid } - if (hyperpars.effect.data$nested && z.flag){ + if (hyperpars.effect.data$nested && z.flag) { averaging = d[, !(names(d) %in% c("iteration", "nested_cv_run", hyperpars.effect.data$hyperparams, "eol", "error.message", "learner_status")), - drop = FALSE] + drop = FALSE] # keep experiments if we need it - if (na.flag || (!is.null(interpolate)) || show.experiments){ + if (na.flag || (!is.null(interpolate)) || show.experiments) { hyperpars = lapply(d[, c(hyperpars.effect.data$hyperparams, "learner_status")], "[") } else { @@ -407,13 +419,13 @@ plotHyperParsEffect = function(hyperpars.effect.data, x = NULL, y = NULL, } # just x, y - if ((length(x) == 1) && (length(y) == 1) && !(z.flag)){ + if ((length(x) == 1) && (length(y) == 1) && !(z.flag)) { if (hyperpars.effect.data$nested && !partial.flag) { plt = ggplot(d, aes_string(x = x, y = y, color = "nested_cv_run")) } else { plt = ggplot(d, aes_string(x = x, y = y)) } - if (na.flag && !partial.flag){ + if (na.flag && !partial.flag) { plt = plt + geom_point(aes_string(shape = "learner_status", color = "learner_status")) + scale_shape_manual(values = c("Failure" = 24, "Success" = 0)) + @@ -421,19 +433,22 @@ plotHyperParsEffect = function(hyperpars.effect.data, x = NULL, y = NULL, } else { plt = plt + geom_point() } - if (plot.type == "line") + if (plot.type == "line") { plt = plt + geom_line() - if (loess.smooth) + } + if (loess.smooth) { plt = plt + geom_smooth() - if (facet.flag) + } + if (facet.flag) { plt = plt + facet_wrap(facet) - } else if ((length(x) == 1) && (length(y) == 1) && (z.flag)){ + } + } else if ((length(x) == 1) && (length(y) == 1) && (z.flag)) { # the data we use depends on if interpolation - if (heatcontour.flag){ - if (!is.null(interpolate)){ + if (heatcontour.flag) { + if (!is.null(interpolate)) { plt = ggplot(data = d[d$learner_status == "Interpolated Point", ], aes_string(x = x, y = y, fill = z, z = z)) + geom_raster() - if (show.interpolated && !(na.flag || show.experiments)){ + if (show.interpolated && !(na.flag || show.experiments)) { plt = plt + geom_point(aes_string(shape = "learner_status")) + scale_shape_manual(values = c("Interpolated Point" = 6)) } @@ -441,11 +456,11 @@ plotHyperParsEffect = function(hyperpars.effect.data, x = NULL, y = NULL, plt = ggplot(data = d, aes_string(x = x, y = y, fill = z, z = z)) + geom_raster() } - if ((na.flag || show.experiments) && !show.interpolated && !partial.flag){ + if ((na.flag || show.experiments) && !show.interpolated && !partial.flag) { plt = plt + geom_point(data = d[d$learner_status %in% c("Success", "Failure"), ], - aes_string(shape = "learner_status"), - fill = "red") + + aes_string(shape = "learner_status"), + fill = "red") + scale_shape_manual(values = c("Failure" = 24, "Success" = 0)) } else if ((na.flag || show.experiments) && (show.interpolated)) { plt = plt + geom_point(data = d, aes_string(shape = "learner_status"), @@ -453,21 +468,23 @@ plotHyperParsEffect = function(hyperpars.effect.data, x = NULL, y = NULL, scale_shape_manual(values = c("Failure" = 24, "Success" = 0, "Interpolated Point" = 6)) } - if (plot.type == "contour") + if (plot.type == "contour") { plt = plt + geom_contour() + } plt = plt + scale_fill_gradientn(colors = c("#9E0142", "#D53E4F", "#F46D43", "#FDAE61", "#FEE08B", "#FFFFBF", "#E6F598", "#ABDDA4", "#66C2A5", "#3288BD", "#5E4FA2")) # RColorBrewer::brewer.pal(11, "Spectral") } else { plt = ggplot(d, aes_string(x = x, y = y, color = z)) - if (na.flag){ + if (na.flag) { plt = plt + geom_point(aes_string(shape = "learner_status", color = "learner_status")) + scale_shape_manual(values = c("Failure" = 24, "Success" = 0)) + scale_color_manual(values = c("red", "black")) - } else{ + } else { plt = plt + geom_point() } - if (plot.type == "line") + if (plot.type == "line") { plt = plt + geom_line() + } } } return(plt) diff --git a/R/generateLearningCurve.R b/R/generateLearningCurve.R index 93f5109db9..f6cf67d8e9 100644 --- a/R/generateLearningCurve.R +++ b/R/generateLearningCurve.R @@ -38,14 +38,14 @@ #' }} #' @examples #' r = generateLearningCurveData(list("classif.rpart", "classif.knn"), -#' task = sonar.task, percs = seq(0.2, 1, by = 0.2), -#' measures = list(tp, fp, tn, fn), resampling = makeResampleDesc(method = "Subsample", iters = 5), -#' show.info = FALSE) +#' task = sonar.task, percs = seq(0.2, 1, by = 0.2), +#' measures = list(tp, fp, tn, fn), resampling = makeResampleDesc(method = "Subsample", iters = 5), +#' show.info = FALSE) #' plotLearningCurve(r) #' @noMd #' @export generateLearningCurveData = function(learners, task, resampling = NULL, - percs = seq(0.1, 1, by = 0.1), measures, stratify = FALSE, show.info = getMlrOption("show.info")) { + percs = seq(0.1, 1, by = 0.1), measures, stratify = FALSE, show.info = getMlrOption("show.info")) { learners = ensureVector(learners, 1, "Learner") learners = lapply(learners, checkLearner) @@ -54,14 +54,17 @@ generateLearningCurveData = function(learners, task, resampling = NULL, measures = checkMeasures(measures, task) assertFlag(stratify) - if (is.null(resampling)) + if (is.null(resampling)) { resampling = makeResampleInstance("Holdout", task = task) - else + } else { assert(checkClass(resampling, "ResampleDesc"), checkClass(resampling, "ResampleInstance")) + } # create downsampled versions for all learners lrnds1 = lapply(learners, function(lrn) { + lapply(seq_along(percs), function(p.id) { + perc = percs[p.id] dsw = makeDownsampleWrapper(learner = lrn, dw.perc = perc, dw.stratify = stratify) list( @@ -74,7 +77,7 @@ generateLearningCurveData = function(learners, task, resampling = NULL, lrnds2 = unlist(lrnds1, recursive = FALSE) dsws = extractSubList(lrnds2, "lrn", simplify = FALSE) - bench.res = benchmark(dsws, task, resampling, measures, show.info = show.info) + bench.res = benchmark(dsws, task, resampling, measures, show.info = show.info) perfs = getBMRAggrPerformances(bench.res, as.df = TRUE) # get perc and learner col data @@ -94,6 +97,7 @@ generateLearningCurveData = function(learners, task, resampling = NULL, } #' @export print.LearningCurveData = function(x, ...) { + catf("LearningCurveData:") catf("Task: %s", x$task$task.desc$id) catf("Measures: %s", collapse(extractSubList(x$measures, "name"))) @@ -122,6 +126,7 @@ print.LearningCurveData = function(x, ...) { #' @export plotLearningCurve = function(obj, facet = "measure", pretty.names = TRUE, facet.wrap.nrow = NULL, facet.wrap.ncol = NULL) { + assertClass(obj, "LearningCurveData") mappings = c("measure", "learner") assertChoice(facet, mappings) @@ -138,15 +143,18 @@ plotLearningCurve = function(obj, facet = "measure", pretty.names = TRUE, nlearn = length(unique(data$learner)) nmeas = length(unique(data$measure)) - if ((color == "learner" & nlearn == 1L) | (color == "measure" & nmeas == 1L)) + if ((color == "learner" & nlearn == 1L) | (color == "measure" & nmeas == 1L)) { color = NULL - if ((facet == "learner" & nlearn == 1L) | (facet == "measure" & nmeas == 1L)) + } + if ((facet == "learner" & nlearn == 1L) | (facet == "measure" & nmeas == 1L)) { facet = NULL + } - if (!is.null(color)) + if (!is.null(color)) { plt = ggplot(data, aes_string(x = "percentage", y = "performance", colour = color)) - else + } else { plt = ggplot(data, aes_string(x = "percentage", y = "performance")) + } plt = plt + geom_point() plt = plt + geom_line() if (!is.null(facet)) { diff --git a/R/generatePartialDependence.R b/R/generatePartialDependence.R index bf1ab835c1..b5dc2aaa09 100644 --- a/R/generatePartialDependence.R +++ b/R/generatePartialDependence.R @@ -97,7 +97,7 @@ #' fit = train(lrn, bh.task) #' pd = generatePartialDependenceData(fit, bh.task, "lstat") #' plotPartialDependence(pd, data = getTaskData(bh.task)) -#' +#' #' lrn = makeLearner("classif.rpart", predict.type = "prob") #' fit = train(lrn, iris.task) #' pd = generatePartialDependenceData(fit, iris.task, "Petal.Width") @@ -110,12 +110,15 @@ generatePartialDependenceData = function(obj, input, features = NULL, requirePackages("mmpf") assertClass(obj, "WrappedModel") - if (obj$learner$predict.type == "se" & individual) + if (obj$learner$predict.type == "se" & individual) { stop("individual = TRUE not compatabile with predict.type = 'se'!") - if (obj$learner$predict.type == "se" & derivative) + } + if (obj$learner$predict.type == "se" & derivative) { stop("derivative = TRUE is not compatible with predict.type = 'se'!") - if (!inherits(input, c("Task", "data.frame"))) + } + if (!inherits(input, c("Task", "data.frame"))) { stop("input must be a Task or a data.frame!") + } if (inherits(input, "Task")) { data = getTaskData(input) td = input$task.desc @@ -126,8 +129,9 @@ generatePartialDependenceData = function(obj, input, features = NULL, assertSetEqual(colnames(data), c(obj$features, td$target), ordered = FALSE) } - if (is.na(n[2])) + if (is.na(n[2])) { n[2] = nrow(data) + } if (is.null(features)) { features = colnames(data)[!colnames(data) %in% td$target] @@ -137,11 +141,13 @@ generatePartialDependenceData = function(obj, input, features = NULL, assertFlag(interaction) assertFlag(derivative) - if (derivative & interaction) + if (derivative & interaction) { stop("interaction cannot be TRUE if derivative is TRUE.") + } if (derivative) { - if (any(sapply(data[, features, drop = FALSE], class) %in% c("factor", "ordered", "character"))) + if (any(sapply(data[, features, drop = FALSE], class) %in% c("factor", "ordered", "character"))) { stop("All features must be numeric to estimate set derivative = TRUE!") + } } se = Function = Class = patterns = NULL # nolint @@ -157,8 +163,9 @@ generatePartialDependenceData = function(obj, input, features = NULL, multi.fun = FALSE } else { multi.fun = TRUE - if (is.null(names(test.fun)) & !individual) + if (is.null(names(test.fun)) & !individual) { stop("If fun returns a vector it must be named.") + } } assertNumeric(bounds, len = 2L) @@ -171,9 +178,9 @@ generatePartialDependenceData = function(obj, input, features = NULL, stop("The number of points taken from the training data cannot exceed the number of training data points.") } - if (td$type == "regr") + if (td$type == "regr") { target = td$target - else if (td$type == "classif") { + } else if (td$type == "classif") { if (length(td$class.levels) > 2L) { target = td$class.levels } else { @@ -189,17 +196,18 @@ generatePartialDependenceData = function(obj, input, features = NULL, vars = if (interaction) list(features) else as.list(features), more.args = args) if (length(target) == 1L) { out = lapply(out, function(x) { + feature = features[features %in% names(x)] - names(x) = stri_replace_all(names(x), target, regex = "^preds") - x = data.table(x) - if (individual) { - x = melt(x, id.vars = feature, variable.name = "n", value.name = target) - x[, n := stri_replace(n, "", regex = target)] - setnames(x, c(feature, if (individual) "n" else "Function", target), names(x)) - } else { - x - } - }) + names(x) = stri_replace_all(names(x), target, regex = "^preds") + x = data.table(x) + if (individual) { + x = melt(x, id.vars = feature, variable.name = "n", value.name = target) + x[, n := stri_replace(n, "", regex = target)] + setnames(x, c(feature, if (individual) "n" else "Function", target), names(x)) + } else { + x + } + }) } } else { points = lapply(features, function(x) mmpf::uniformGrid(data[[x]], n[1])) @@ -244,9 +252,10 @@ generatePartialDependenceData = function(obj, input, features = NULL, if (td$type == "classif") "Probability" else "Prediction", features)) } } else if (individual) { - if (!derivative) + if (!derivative) { out = melt(out, measure = patterns(target), variable.name = "n", value.name = target) + } out = melt(out, measure.vars = target, variable.name = if (td$type == "classif") "Class" else "Target", value.name = if (td$type == "classif") "Probability" else "Prediction") @@ -311,9 +320,10 @@ doDerivativeMarginalPrediction = function(x, z = sample(seq_len(nrow(data)), n[2 int.points = z, predict.fun = getPrediction, n = n, target = target, individual = individual, ...), - points[[x]], if (individual) z) + points[[x]], if (individual) z) } else { out = lapply(points[[x]], function(x.value) { + t(numDeriv::jacobian(numDerivWrapper, x = x.value, model = obj, data = data, uniform = uniform, aggregate.fun = fun, vars = x, int.points = z, predict.fun = getPrediction, n = n, target = target, @@ -333,6 +343,7 @@ doDerivativeMarginalPrediction = function(x, z = sample(seq_len(nrow(data)), n[2 # so i need to pass the points as that x, and then extract the appropriate # vector or matrix from marginalPrediction numDerivWrapper = function(points, vars, individual, target, ...) { + args = list(...) args$points = list(points) names(args$points) = vars @@ -343,6 +354,7 @@ numDerivWrapper = function(points, vars, individual, target, ...) { #' @export print.PartialDependenceData = function(x, ...) { + catf("PartialDependenceData") catf("Task: %s", x$task.desc$id) catf("Features: %s", stri_paste(x$features, collapse = ", ", sep = " ")) @@ -396,25 +408,29 @@ plotPartialDependence = function(obj, geom = "line", facet = NULL, facet.wrap.nr assertClass(obj, "PartialDependenceData") assertChoice(geom, c("tile", "line")) - if (obj$interaction & length(obj$features) > 2L & geom != "tile") + if (obj$interaction & length(obj$features) > 2L & geom != "tile") { stop("Cannot plot more than 2 features together with line plots.") + } if (geom == "tile") { - if (!obj$interaction) + if (!obj$interaction) { stop("obj argument created by generatePartialDependenceData was called with interaction = FALSE!") + } } if (!is.null(data)) { assertDataFrame(data, col.names = "unique", min.rows = 1L, - min.cols = length(obj$features) + length(obj$td$target)) + min.cols = length(obj$features) + length(obj$td$target)) assertSubset(obj$features, colnames(data), empty.ok = FALSE) } if (!is.null(facet)) { assertChoice(facet, obj$features) - if (!length(obj$features) %in% 2:3) + if (!length(obj$features) %in% 2:3) { stop("obj argument created by generatePartialDependenceData must be called with two or three features to use this argument!") - if (!obj$interaction) + } + if (!obj$interaction) { stop("obj argument created by generatePartialDependenceData must be called with interaction = TRUE to use this argument!") + } features = obj$features[which(obj$features != facet)] if (is.factor(obj$data[[facet]])) { @@ -441,14 +457,15 @@ plotPartialDependence = function(obj, geom = "line", facet = NULL, facet.wrap.nr # detect if there was a multi-output function used in which case # there should be a column named function which needs to be facetted if ("Function" %in% colnames(obj$data)) { - facet = c(facet, "Function") + facet = c(facet, "Function") } # sample from individual partial dependence estimates if (p != 1) { assertNumber(p, lower = 0, upper = 1, finite = TRUE) - if (!obj$individual) + if (!obj$individual) { stop("obj argument created by generatePartialDependenceData must be called with individual = TRUE to use this argument!") + } rows = unique(obj$data$idx) id = sample(rows, size = floor(p * length(rows))) obj$data = obj$data[which(obj$data$idx %in% id), ] @@ -483,16 +500,16 @@ plotPartialDependence = function(obj, geom = "line", facet = NULL, facet.wrap.nr if (!obj$individual) { # for regression/survival this is a simple line plot if (obj$task.desc$type %in% c("regr", "surv") | - (obj$task.desc$type == "classif" & length(obj$task.desc$class.levels) <= 2L)) { + (obj$task.desc$type == "classif" & length(obj$task.desc$class.levels) <= 2L)) { plt = ggplot(obj$data, aes_string("Value", target)) + geom_line(color = ifelse(is.null(data), "black", "red")) + geom_point() - } else {# for classification create different colored lines + } else { # for classification create different colored lines plt = ggplot(obj$data, aes_string("Value", "Probability", group = "Class", color = "Class")) + geom_line() + geom_point() } } else { # if individual is true make the lines semi-transparent if (obj$task.desc$type %in% c("regr", "surv") | - (obj$task.desc$type == "classif" & length(obj$task.desc$class.levels) <= 2L)) { + (obj$task.desc$type == "classif" & length(obj$task.desc$class.levels) <= 2L)) { plt = ggplot(obj$data, aes_string("Value", target, group = "n")) + geom_line(alpha = .25, color = ifelse(is.null(data), "black", "red")) + geom_point() } else { @@ -505,45 +522,51 @@ plotPartialDependence = function(obj, geom = "line", facet = NULL, facet.wrap.nr # so rename the x-axis using the feature name. rename target only if it was a vector # since in this case the target name isn't passed through if (length(features) == 1L) { - if (obj$task.desc$type %in% c("regr", "surv")) + if (obj$task.desc$type %in% c("regr", "surv")) { plt = plt + labs(x = features, y = target) - else + } else { plt = plt + labs(x = features) + } } # ribbon bounds from se estimation - if (bounds) + if (bounds) { plt = plt + geom_ribbon(aes_string(ymin = "lower", ymax = "upper"), alpha = .5) + } # labels added to for derivative plots - if (obj$derivative) + if (obj$derivative) { plt = plt + ylab(stri_paste(target, "(derivative)", sep = " ")) + } } else { ## tiling if (obj$task.desc$type == "classif") { target = "Probability" facet = "Class" - if ("Function" %in% obj$data) + if ("Function" %in% obj$data) { facet = c(facet, "Function") + } scales = "free" } plt = ggplot(obj$data, aes_string(x = features[1], y = features[2], fill = target)) plt = plt + geom_raster(aes_string(fill = target)) # labels for ICE plots - if (obj$derivative) + if (obj$derivative) { plt = plt + scale_fill_continuous(guide = guide_colorbar(title = stri_paste(target, "(derivative)", sep = " "))) + } } # facetting which is either passed in by the user, the features column when interaction = FALSE and length(features) > 1 # and/or when fun outputs a vector (then facetting on the Function column) if (!is.null(facet)) { - if (length(facet) == 1L) + if (length(facet) == 1L) { plt = plt + facet_wrap(as.formula(stri_paste("~", facet)), scales = scales, nrow = facet.wrap.nrow, ncol = facet.wrap.ncol) - else + } else { plt = plt + facet_wrap(as.formula(stri_paste(facet[2], "~", facet[1])), scales = scales, - nrow = facet.wrap.nrow, ncol = facet.wrap.ncol) # facet ordering is reversed deliberately to handle len = 1 case! + nrow = facet.wrap.nrow, ncol = facet.wrap.ncol) + } # facet ordering is reversed deliberately to handle len = 1 case! } # data overplotting @@ -553,15 +576,17 @@ plotPartialDependence = function(obj, geom = "line", facet = NULL, facet.wrap.nr feature.facet = facet[facet %in% obj$features] fun.facet = facet[!facet %in% feature.facet] - if (length(fun.facet) > 0L && (fun.facet == "Feature" || !feature.facet %in% obj$features)) + if (length(fun.facet) > 0L && (fun.facet == "Feature" || !feature.facet %in% obj$features)) { data = melt(data, id.vars = c(obj$task.desc$target, feature.facet), variable = "Feature", value.name = "Value", na.rm = TRUE, variable.factor = TRUE) + } if (length(feature.facet) > 0) { - if (!is.factor(data[[feature.facet]])) + if (!is.factor(data[[feature.facet]])) { data[[feature.facet]] = stri_paste(feature.facet, "=", as.factor(signif(data[[feature.facet]], 2)), sep = " ") - else + } else { data[[feature.facet]] = stri_paste(feature.facet, "=", data[[feature.facet]], sep = " ") + } } if (length(fun.facet) > 0L && "Function" %in% fun.facet) { @@ -574,19 +599,19 @@ plotPartialDependence = function(obj, geom = "line", facet = NULL, facet.wrap.nr if (obj$task.desc$type == "classif") { if (!is.na(obj$task.desc$positive)) { plt = plt + geom_rug(aes_string(plt$labels$x, color = obj$task.desc$target), - data[data[[obj$task.desc$target]] == obj$task.desc$positive, ], - alpha = .25, inherit.aes = FALSE) + data[data[[obj$task.desc$target]] == obj$task.desc$positive, ], + alpha = .25, inherit.aes = FALSE) } else { plt = plt + geom_rug(aes_string(plt$labels$x), data, alpha = .25, inherit.aes = FALSE) } } else { plt = plt + geom_rug(aes_string(plt$labels$x), - data[data[[obj$task.desc$target[2]]], ], - alpha = .25, inherit.aes = FALSE) + data[data[[obj$task.desc$target[2]]], ], + alpha = .25, inherit.aes = FALSE) } } else { plt = plt + geom_point(aes_string(plt$labels$x, obj$task.desc$target), - data, alpha = .25, inherit.aes = FALSE) + data, alpha = .25, inherit.aes = FALSE) } } else { plt = plt + geom_point(aes_string(plt$labels$x, plt$labels$y), data, alpha = .25, inherit.aes = FALSE) diff --git a/R/generateThreshVsPerf.R b/R/generateThreshVsPerf.R index 2c84bc3ced..cdbe64694a 100644 --- a/R/generateThreshVsPerf.R +++ b/R/generateThreshVsPerf.R @@ -27,25 +27,29 @@ generateThreshVsPerfData = function(obj, measures, gridsize = 100L, aggregate = UseMethod("generateThreshVsPerfData") #' @export generateThreshVsPerfData.Prediction = function(obj, measures, gridsize = 100L, aggregate = TRUE, - task.id = NULL) { + task.id = NULL) { + checkPrediction(obj, task.type = "classif", binary = TRUE, predict.type = "prob") generateThreshVsPerfData.list(namedList("prediction", obj), measures, gridsize, aggregate, task.id) } #' @export generateThreshVsPerfData.ResampleResult = function(obj, measures, gridsize = 100L, aggregate = TRUE, - task.id = NULL) { + task.id = NULL) { + obj = getRRPredictions(obj) checkPrediction(obj, task.type = "classif", binary = TRUE, predict.type = "prob") generateThreshVsPerfData.Prediction(obj, measures, gridsize, aggregate) } #' @export generateThreshVsPerfData.BenchmarkResult = function(obj, measures, gridsize = 100L, aggregate = TRUE, - task.id = NULL) { + task.id = NULL) { + tids = getBMRTaskIds(obj) - if (is.null(task.id)) + if (is.null(task.id)) { task.id = tids[1L] - else + } else { assertChoice(task.id, tids) + } obj = getBMRPredictions(obj, task.ids = task.id, as.df = FALSE)[[1L]] for (x in obj) @@ -54,11 +58,13 @@ generateThreshVsPerfData.BenchmarkResult = function(obj, measures, gridsize = 10 } #' @export generateThreshVsPerfData.list = function(obj, measures, gridsize = 100L, aggregate = TRUE, task.id = NULL) { + assertList(obj, c("Prediction", "ResampleResult"), min.len = 1L) ## unwrap ResampleResult to Prediction and set default names if (inherits(obj[[1L]], "ResampleResult")) { - if (is.null(names(obj))) + if (is.null(names(obj))) { names(obj) = extractSubList(obj, "learner.id") + } obj = extractSubList(obj, "pred", simplify = FALSE) } @@ -70,11 +76,14 @@ generateThreshVsPerfData.list = function(obj, measures, gridsize = 100L, aggrega grid = data.frame(threshold = seq(0, 1, length.out = gridsize)) resamp = all(vlapply(obj, function(x) inherits(x, "ResamplePrediction"))) out = lapply(obj, function(x) { + do.call("rbind", lapply(grid$threshold, function(th) { + pp = setThreshold(x, threshold = th) if (!aggregate && resamp) { iter = seq_len(pp$instance$desc$iters) asMatrixRows(lapply(iter, function(i) { + pp$data = pp$data[pp$data$iter == i, ] c(setNames(performance(pp, measures = measures), mids), "iter" = i, "threshold" = th) })) @@ -93,9 +102,9 @@ generateThreshVsPerfData.list = function(obj, measures, gridsize = 100L, aggrega } makeS3Obj("ThreshVsPerfData", - measures = measures, - data = as.data.frame(out), - aggregate = aggregate) + measures = measures, + data = as.data.frame(out), + aggregate = aggregate) } #' @title Plot threshold vs. performance(s) for 2-class classification using ggplot2. @@ -160,17 +169,20 @@ plotThreshVsPerf = function(obj, measures = obj$measures, } data = setDF(melt(as.data.table(obj$data), measure.vars = mnames, variable.name = "measure", value.name = "performance", id.vars = id.vars)) - if (!is.null(data$learner)) + if (!is.null(data$learner)) { nlearn = length(unique(data$learner)) - else + } else { nlearn = 1L + } nmeas = length(unique(data$measure)) - if ((color == "learner" && nlearn == 1L) || (color == "measure" && nmeas == 1L)) + if ((color == "learner" && nlearn == 1L) || (color == "measure" && nmeas == 1L)) { color = NULL + } - if ((facet == "learner" && nlearn == 1L) || (facet == "measure" && nmeas == 1L)) + if ((facet == "learner" && nlearn == 1L) || (facet == "measure" && nmeas == 1L)) { facet = NULL + } if (resamp && !obj$aggregate && is.null(color)) { group = "iter" @@ -184,17 +196,19 @@ plotThreshVsPerf = function(obj, measures = obj$measures, plt = ggplot(data, aes_string(x = "threshold", y = "performance")) plt = plt + geom_line(aes_string(group = group, color = color)) - if (!is.na(mark.th)) + if (!is.na(mark.th)) { plt = plt + geom_vline(xintercept = mark.th) + } if (!is.null(facet)) { plt = plt + facet_wrap(facet, scales = "free_y", nrow = facet.wrap.nrow, ncol = facet.wrap.ncol) } - else if (length(obj$measures) == 1L) + else if (length(obj$measures) == 1L) { plt = plt + ylab(obj$measures[[1]]$name) - else + } else { plt = plt + ylab("performance") + } return(plt) } @@ -228,38 +242,43 @@ plotThreshVsPerf = function(obj, measures = obj$measures, #' pred = predict(fit, task = sonar.task) #' roc = generateThreshVsPerfData(pred, list(fpr, tpr)) #' plotROCCurves(roc) -#' +#' #' r = bootstrapB632plus(lrn, sonar.task, iters = 3) #' roc_r = generateThreshVsPerfData(r, list(fpr, tpr), aggregate = FALSE) #' plotROCCurves(roc_r) -#' +#' #' r2 = crossval(lrn, sonar.task, iters = 3) #' roc_l = generateThreshVsPerfData(list(boot = r, cv = r2), list(fpr, tpr), aggregate = FALSE) #' plotROCCurves(roc_l) #' } plotROCCurves = function(obj, measures, diagonal = TRUE, pretty.names = TRUE, facet.learner = FALSE) { + assertClass(obj, "ThreshVsPerfData") - if (missing(measures)) + if (missing(measures)) { measures = obj$measures[1:2] + } assertList(measures, "Measure", len = 2) assertFlag(diagonal) assertFlag(pretty.names) assertFlag(facet.learner) - if (is.null(names(measures))) + if (is.null(names(measures))) { names(measures) = extractSubList(measures, "id") + } - if (pretty.names) + if (pretty.names) { mnames = replaceDupeMeasureNames(measures, "name") - else + } else { mnames = names(measures) + } - if (!is.null(obj$data$learner)) + if (!is.null(obj$data$learner)) { mlearn = length(unique(obj$data$learner)) > 1L - else + } else { mlearn = FALSE + } resamp = "iter" %in% colnames(obj$data) @@ -276,15 +295,18 @@ plotROCCurves = function(obj, measures, diagonal = TRUE, pretty.names = TRUE, fa obj$data = obj$data[order(obj$data$threshold), ] } - if (mlearn && !facet.learner) + if (mlearn && !facet.learner) { aes$color = "learner" + } p = ggplot(obj$data, do.call(aes_string, aes)) + geom_path() + labs(x = mnames[1], y = mnames[2]) - if (mlearn && facet.learner) - p = p + facet_wrap(~ learner) + if (mlearn && facet.learner) { + p = p + facet_wrap(~learner) + } - if (diagonal && all(vlapply(obj$data[, names(measures)], function(x) max(x, na.rm = TRUE) <= 1))) + if (diagonal && all(vlapply(obj$data[, names(measures)], function(x) max(x, na.rm = TRUE) <= 1))) { p = p + geom_abline(aes(intercept = 0, slope = 1), linetype = "dashed", alpha = .5) + } p } diff --git a/R/getCaretParamSet.R b/R/getCaretParamSet.R index 189b1fba0e..bd3977e341 100644 --- a/R/getCaretParamSet.R +++ b/R/getCaretParamSet.R @@ -34,23 +34,25 @@ #' if (requireNamespace("caret") && requireNamespace("mlbench")) { #' library(caret) #' classifTask = makeClassifTask(data = iris, target = "Species") -#' +#' #' # (1) classification (random forest) with discretized parameters #' getCaretParamSet("rf", length = 9L, task = classifTask, discretize = TRUE) -#' +#' #' # (2) regression (gradient boosting machine) without discretized parameters #' library(mlbench) #' data(BostonHousing) #' regrTask = makeRegrTask(data = BostonHousing, target = "medv") #' getCaretParamSet("gbm", length = 9L, task = regrTask, discretize = FALSE) #' } -getCaretParamSet = function(learner, length = 3L, task, discretize = TRUE){ +getCaretParamSet = function(learner, length = 3L, task, discretize = TRUE) { + td = getTaskData(task, target.extra = TRUE) caret.grid = caret::getModelInfo(learner)[[learner]]$grid( x = td$data, y = td$target, len = length) # transfer caret parameters into mlr parameters params = lapply(colnames(caret.grid), function(i) { + par.vals = sort(unique(caret.grid[, i])) cl = class(par.vals) if (cl == "factor") { @@ -62,8 +64,9 @@ getCaretParamSet = function(learner, length = 3L, task, discretize = TRUE){ cl = "character" } } - if (discretize) + if (discretize) { cl = "character" + } switch(cl, character = makeDiscreteParam(id = i, values = par.vals), logical = makeLogicalParam(id = i), @@ -75,6 +78,7 @@ getCaretParamSet = function(learner, length = 3L, task, discretize = TRUE){ # are the parameters configurable or are the values unique? is.tunable = vlapply(params, function(x) { + (!is.null(x$values) && length(x$values) > 1) | (!is.null(x$lower) && !is.null(x$upper) && (x$upper > x$lower)) }) @@ -84,8 +88,10 @@ getCaretParamSet = function(learner, length = 3L, task, discretize = TRUE){ par.vals = NULL } else { par.vals = lapply(caret.grid[!is.tunable], function(x) { - if (is.factor(x)) + + if (is.factor(x)) { x = as.character(x) + } return(x[1L]) }) # convert integerish variables into integer diff --git a/R/getClassWeightParam.R b/R/getClassWeightParam.R index 548dab711e..e54add94a6 100644 --- a/R/getClassWeightParam.R +++ b/R/getClassWeightParam.R @@ -9,6 +9,7 @@ #' @family learner #' @export getClassWeightParam = function(learner) { + learner = checkLearner(learner, "classif") assertChoice("class.weights", getLearnerProperties(learner)) learner$par.set$pars[[learner$class.weights.param]] diff --git a/R/getConfMatrix.R b/R/getConfMatrix.R index 178aabda64..930824b3bd 100644 --- a/R/getConfMatrix.R +++ b/R/getConfMatrix.R @@ -27,6 +27,7 @@ #' @export #' @seealso [predict.WrappedModel] getConfMatrix = function(pred, relative = FALSE) { + .Deprecated("calculateConfusionMatrix") calculateConfusionMatrix(pred, relative = relative) } diff --git a/R/getFeatSelResult.R b/R/getFeatSelResult.R index 50a79c3b96..aadc447092 100644 --- a/R/getFeatSelResult.R +++ b/R/getFeatSelResult.R @@ -6,8 +6,7 @@ #' @export #' @family featsel getFeatSelResult = function(object) { + assertClass(object, "FeatSelModel") object$learner.model$opt.result } - - diff --git a/R/getFeatureImportance.R b/R/getFeatureImportance.R index 27f37547dd..ae1e3b971a 100644 --- a/R/getFeatureImportance.R +++ b/R/getFeatureImportance.R @@ -62,14 +62,15 @@ getFeatureImportance = function(object, ...) { lrn = checkLearner(object$learner, props = "featimp") imp = getFeatureImportanceLearner(lrn, object, ...) - if (!check_numeric(imp, names = "unique") && !check_subset(names(imp), object$features)) + if (!check_numeric(imp, names = "unique") && !check_subset(names(imp), object$features)) { stop("getFeatureImportanceLearner did not return a named vector with names of the task features.") + } - #We need to add missing pars with zero and order them + # We need to add missing pars with zero and order them imp[setdiff(object$features, names(imp))] = 0 imp = imp[object$features] - #convert named vector to data.frame with columns and set NA to 0 + # convert named vector to data.frame with columns and set NA to 0 imp[is.na(imp)] = 0L imp = as.data.frame(t(imp)) rownames(imp) = NULL @@ -107,10 +108,12 @@ getFeatureImportance = function(object, ...) { #' @export #' @keywords internal getFeatureImportanceLearner = function(.learner, .model, ...) { + UseMethod("getFeatureImportanceLearner") } #' @export getFeatureImportanceLearner.BaseWrapper = function(.learner, .model, ...) { + getFeatureImportanceLearner(.learner$next.learner, .model = .model, ...) } diff --git a/R/getFunctionalFeatures.R b/R/getFunctionalFeatures.R index 9dba8e7c0a..605fa6a6cc 100644 --- a/R/getFunctionalFeatures.R +++ b/R/getFunctionalFeatures.R @@ -7,12 +7,14 @@ # @return Returns a [\code{data.frame}] containing only the functional features. # @export getFunctionalFeatures = function(object, subset = NULL, features, recode.target = "no") { + UseMethod("getFunctionalFeatures") } # @export # @rdname getFunctionalFeatures getFunctionalFeatures.Task = function(object, subset = NULL, features, recode.target = "no") { + # Get data and pass on to data.frame method df = getTaskData(object, subset, features, target.extra = TRUE, recode.target, functionals.as = "matrix") getFunctionalFeatures.data.frame(df$data) @@ -20,13 +22,12 @@ getFunctionalFeatures.Task = function(object, subset = NULL, features, recode.ta # @export # @rdname getFunctionalFeatures -getFunctionalFeatures.data.frame = function(object, subset = NULL, features, recode.target = "no"){ +getFunctionalFeatures.data.frame = function(object, subset = NULL, features, recode.target = "no") { + # Keep only columns with class matrix funct.cols = which(vcapply(object, function(x) class(x)[1L]) == "matrix") - if (length(funct.cols) == 0L) + if (length(funct.cols) == 0L) { stop("No functional features in the data") + } object[, funct.cols, drop = FALSE] } - - - diff --git a/R/getHyperPars.R b/R/getHyperPars.R index 64f26d11f1..72f02ba36d 100644 --- a/R/getHyperPars.R +++ b/R/getHyperPars.R @@ -13,13 +13,15 @@ #' @return ([list]). A named list of values. #' @family learner #' @export -getHyperPars = function(learner, for.fun = c("train", "predict", "both")) { +getHyperPars = function(learner, for.fun = c("train", "predict", "both")) { + assertSubset(for.fun, choices = c("train", "predict", "both")) UseMethod("getHyperPars") } #' @export getHyperPars.Learner = function(learner, for.fun = c("train", "predict", "both")) { + assertClass(learner, classes = "Learner") pars = learner$par.set$pars pv = learner$par.vals @@ -28,10 +30,10 @@ getHyperPars.Learner = function(learner, for.fun = c("train", "predict", "both") } getHyperParsString = function(learner, show.missing.values = TRUE) { + hps = getHyperPars(learner) ns = names(hps) pars = getParamSet(learner)$pars[ns] s = mapply(paramValueToString, pars, hps, MoreArgs = list(show.missing.values = show.missing.values)) stri_paste(ns, s, sep = "=", collapse = ",") } - diff --git a/R/getMultilabelBinaryPerformances.R b/R/getMultilabelBinaryPerformances.R index 6cff138823..75cbcba4f8 100644 --- a/R/getMultilabelBinaryPerformances.R +++ b/R/getMultilabelBinaryPerformances.R @@ -15,6 +15,7 @@ #' @examples #' # see makeMultilabelBinaryRelevanceWrapper getMultilabelBinaryPerformances = function(pred, measures) { + checkPrediction(pred, task.type = "multilabel") measures = checkMeasures(measures, "classif") p = matrix(, length(pred$task.desc$class.levels), length(measures)) @@ -22,8 +23,9 @@ getMultilabelBinaryPerformances = function(pred, measures) { rownames(p) = pred$task.desc$class.levels truths = getPredictionTruth(pred) responses = getPredictionResponse(pred) - if (pred$predict.type == "prob") + if (pred$predict.type == "prob") { probs = getPredictionProbabilities(pred) + } for (measure in measures) { predi = pred predi$task.desc$type = "classif" @@ -33,8 +35,9 @@ getMultilabelBinaryPerformances = function(pred, measures) { measurename = measureAggrName(measure) for (label in pred$task.desc$class.levels) { predi$data = data.frame(truth = truths[, label], response = responses[, label]) - if (pred$predict.type == "prob") + if (pred$predict.type == "prob") { predi$data$prob.TRUE = probs[, label] + } p[label, measurename] = performance(predi, measure) } } diff --git a/R/getNestedTuneResults.R b/R/getNestedTuneResults.R index e9396fe247..ef584f2557 100644 --- a/R/getNestedTuneResults.R +++ b/R/getNestedTuneResults.R @@ -13,6 +13,7 @@ #' # see example of makeTuneWrapper #' @export getNestedTuneResultsX = function(r) { + assertClass(r, "ResampleResult") assertList(r$extract) lapply(r$extract, assertClass, classes = "TuneResult") @@ -42,6 +43,7 @@ getNestedTuneResultsX = function(r) { #' # see example of makeTuneWrapper #' @export getNestedTuneResultsOptPathDf = function(r, trafo = FALSE) { + assertClass(r, "ResampleResult") assertList(r$extract) lapply(r$extract, assertClass, classes = "TuneResult") @@ -50,6 +52,7 @@ getNestedTuneResultsOptPathDf = function(r, trafo = FALSE) { if (trafo) ops = lapply(ops, trafoOptPath) op.dfs = lapply(ops, as.data.frame) op.dfs = setDF(rbindlist(lapply(seq_along(op.dfs), function(i) { + op.dfs[[i]][, "iter"] = i op.dfs[[i]] }), fill = TRUE)) diff --git a/R/getOOBPreds.R b/R/getOOBPreds.R index b212f71508..8c80a72768 100644 --- a/R/getOOBPreds.R +++ b/R/getOOBPreds.R @@ -23,6 +23,7 @@ #' oob #' performance(oob, measures = list(auc, mmce)) getOOBPreds = function(model, task) { + assertClass(model, classes = "WrappedModel") checkTask(task, task.type = c("classif", "regr", "surv")) checkModelCorrespondsTask(model, task) @@ -53,18 +54,22 @@ getOOBPreds = function(model, task) { #' @export #' @keywords internal getOOBPredsLearner = function(.learner, .model) { + UseMethod("getOOBPredsLearner") } #' @export getOOBPredsLearner.BaseWrapper = function(.learner, .model) { + getOOBPredsLearner(.learner$next.learner, .model = .model) } # checks if the model was trained on the corresponding task by comparing # the descriptions checkModelCorrespondsTask = function(model, task) { + compare = c("id", "type", "target", "n.feats", "has.weights", "has.blocking", "is.spatial", "positive") - if (!identical(task$task.desc[compare], model$task.desc[compare])) + if (!identical(task$task.desc[compare], model$task.desc[compare])) { stopf("Description of the model does not correspond to the task") + } } diff --git a/R/getParamSet.R b/R/getParamSet.R index 306f654fae..562a44164f 100644 --- a/R/getParamSet.R +++ b/R/getParamSet.R @@ -11,11 +11,13 @@ NULL #' @export getParamSet.Learner = function(x) { + x$par.set } #' @export getParamSet.character = function(x) { + x = checkLearner(x) getParamSet(x) } diff --git a/R/getResampleExtract.R b/R/getResampleExtract.R index 19c48ec197..c664d07803 100644 --- a/R/getResampleExtract.R +++ b/R/getResampleExtract.R @@ -1,4 +1,4 @@ -#In the following we have the functions +# In the following we have the functions # - getReampleExtract2: Returns a feasible function we can use as extract in resample() for each specific learner class. # All is handled recursively because we have wrapped learners. # getResampleExtract2 returns a list of all applicable functions @@ -6,36 +6,45 @@ # - getResampleExtract converts the list of functions obtained by getResampleExtract2 to a single function which returns a list of each function result. getResampleExtract2 = function(learner) { + UseMethod("getResampleExtract2") } getResampleExtract2.default = function(learner) { + stopf("Wrapper without underlying Learner.") } getResampleExtract2.Learner = function(learner) { + NULL } getResampleExtract2.BaseWrapper = function(learner) { + getResampleExtract2(learner$next.learner) } getResampleExtract2.TuneWrapper = function(learner) { + c(list(TuneResult = getTuneResult), getResampleExtract2(learner$next.learner)) } getResampleExtract2.FeatSelWrapper = function(learner) { + c(list(FeatSelResult = getFeatSelResult), getResampleExtract2(learner$next.learner)) } getResampleExtract2.FilterWrapper = function(learner) { + c(list(FilteredFeatures = getFilteredFeatures), getResampleExtract2(learner$next.learner)) } -getResampleExtract = function(learner){ +getResampleExtract = function(learner) { + functions = getResampleExtract2(learner) function(x) { + if (length(functions) == 1L) { functions[[1L]](x) } else { diff --git a/R/getResamplingIndices.R b/R/getResamplingIndices.R index 1d409e0bac..33584356da 100644 --- a/R/getResamplingIndices.R +++ b/R/getResamplingIndices.R @@ -18,7 +18,7 @@ #' ps = makeParamSet( #' makeDiscreteParam("cp", values = c(0.05, 0.1)), #' makeDiscreteParam("minsplit", values = c(10, 20)) -#' ) +#' ) #' ctrl = makeTuneControlGrid() #' inner = makeResampleDesc("Holdout") #' outer = makeResampleDesc("CV", iters = 2) @@ -28,9 +28,9 @@ #' r = resample(lrn, task, outer, extract = getTuneResult) #' # get tuning indices #' getResamplingIndices(r, inner = TRUE) -#' #' @export getResamplingIndices = function(object, inner = FALSE) { + assertClass(object, "ResampleResult") assertList(object$extract) if (inner == TRUE) { @@ -48,8 +48,8 @@ getResamplingIndices = function(object, inner = FALSE) { sapply(c("train.inds", "test.inds"), function(u) # map over train/test level sapply(inner_inds[[z]][[u]], function(m) # map over number of inner folds outer_inds[["train.inds"]][[z]][m], # the inner test.inds are a subset of the outer train.inds! That's why "train.inds" is hardcoded here - simplify = FALSE), - simplify = FALSE) + simplify = FALSE), + simplify = FALSE) ) return(inner_inds_translated) diff --git a/R/getTaskConstructorForLearner.R b/R/getTaskConstructorForLearner.R index ad99413743..b83aae9682 100644 --- a/R/getTaskConstructorForLearner.R +++ b/R/getTaskConstructorForLearner.R @@ -1,4 +1,5 @@ getTaskConstructorForLearner = function(learner) { + while (inherits(learner, "BaseWrapper")) learner = learner$next.learner cl = class(learner) diff --git a/R/getTuneResult.R b/R/getTuneResult.R index 5789cc2f51..bd7fdb9b00 100644 --- a/R/getTuneResult.R +++ b/R/getTuneResult.R @@ -6,7 +6,7 @@ #' @family tune #' @export getTuneResult = function(object) { + assertClass(object, "TuneModel") object$learner.model$opt.result } - diff --git a/R/getTuneThresholdExtra.R b/R/getTuneThresholdExtra.R index c39178994c..3bf7784fe6 100644 --- a/R/getTuneThresholdExtra.R +++ b/R/getTuneThresholdExtra.R @@ -3,6 +3,7 @@ # control [TuneControl] # res [result from evalOptimizationState] getTuneThresholdExtra = function(control, res) { + if (control$tune.threshold) { # add class names to threshold, if longer than 1 extra = as.list(res$threshold) @@ -11,4 +12,3 @@ getTuneThresholdExtra = function(control, res) { NULL } } - diff --git a/R/hasFunctionalFeatures.R b/R/hasFunctionalFeatures.R index 49cefbb60c..2f3a3c0d6f 100644 --- a/R/hasFunctionalFeatures.R +++ b/R/hasFunctionalFeatures.R @@ -8,18 +8,21 @@ #' @return (`logical(1)`) #' @export hasFunctionalFeatures = function(obj) { + UseMethod("hasFunctionalFeatures") } hasFunctionalFeatures.data.frame = function(obj) { + any(vlapply(obj, is.matrix)) } hasFunctionalFeatures.Task = function(obj) { + hasFunctionalFeatures.TaskDesc(obj$task.desc) } hasFunctionalFeatures.TaskDesc = function(obj) { + obj$n.feat["functionals"] > 0L } - diff --git a/R/helpLearner.R b/R/helpLearner.R index 2993c95e68..f7908b9b29 100644 --- a/R/helpLearner.R +++ b/R/helpLearner.R @@ -7,6 +7,7 @@ #' @family learner #' @family help helpLearner = function(learner) { + learner = checkLearner(learner) callees = learner$callees if (identical(callees, "")) { @@ -62,6 +63,7 @@ helpLearner = function(learner) { #' @family learner #' @family help helpLearnerParam = function(learner, param = NULL) { + learner = checkLearner(learner) if (!inherits(learner, "RLearner")) { current.learner = learner @@ -126,6 +128,7 @@ helpLearnerParam = function(learner, param = NULL) { # remove nesting levels of XML tags simplifyNode = function(node) { + children = XML::xmlChildren(node) lens = nchar(stri_trim(vcapply(children, XML::xmlValue))) if (length(lens) < 1) { @@ -146,32 +149,36 @@ simplifyNode = function(node) { # collect all
  • xxxyyy
  • in the document # and form a data.frame with two columns corresponding to xxx and yyy. codeListToTable = function(html) { + lis = XML::getNodeSet(html, "//li") lislis = lapply(lis, function(li) { - lichi = simplifyNode(li) - if (length(lichi) < 2 || names(lichi)[1] != "code") { - return(NULL) - } - parname = XML::xmlValue(lichi[[1]]) - pardesc = stri_join_list(lapply(lichi[-1], XML::xmlValue), collapse = " ") - stri_trim(c(parname, pardesc), pattern = c("[a-zA-Z0-9_.]", "\\P{Wspace}")) - }) + + lichi = simplifyNode(li) + if (length(lichi) < 2 || names(lichi)[1] != "code") { + return(NULL) + } + parname = XML::xmlValue(lichi[[1]]) + pardesc = stri_join_list(lapply(lichi[-1], XML::xmlValue), collapse = " ") + stri_trim(c(parname, pardesc), pattern = c("[a-zA-Z0-9_.]", "\\P{Wspace}")) + }) as.data.frame(do.call(rbind, lislis), stringsAsFactors = FALSE) } # Remove superfluous newlines. prepareString = function(string) { + # turn 'a \n \n \n b' into 'a\n\nb' string = stri_replace_all(string, "\n\n", regex = " *\n *(\n *)+") # turn 'a \n b' into 'a b' string = stri_replace_all(string, " ", regex = "(?-extract if a param occurs in both. @@ -226,7 +234,7 @@ makeParamHelpList = function(funs, pkgs, par.set) { # helper function to get learner's undocumented functions. listUndocumentedPars = function(learner) { + learner = checkLearner(learner) setdiff(getParamIds(learner$par.set), names(learner$help.list)) } - diff --git a/R/helpers.R b/R/helpers.R index c737ae13ab..534f78decb 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -1,24 +1,29 @@ requireLearnerPackages = function(learner) { + requirePackages(learner$package, why = stri_paste("learner", learner$id, sep = " "), default.method = "load") } cleanupPackageNames = function(pkgs) { + stri_replace_all(pkgs, "", regex = "^[!_]") } # paste together measure and aggregation ids measureAggrName = function(measure) { + stri_paste(measure$id, measure$aggr$id, sep = ".") } # paste together measure and aggregation names measureAggrPrettyName = function(measure) { + stri_paste(measure$name, measure$aggr$name, sep = ": ") } # convert a named numvec of perf values (think 'aggr' from resample) into flat string # ala ,..., perfsToString = function(y, sep = "=", digits = options()$digits) { + stri_paste(stri_paste(names(y), "=", formatC(y, digits = digits, flag = "0", format = "f"), sep = ""), collapse = ",", sep = " ") } @@ -30,14 +35,17 @@ perfsToString = function(y, sep = "=", digits = options()$digits) { # Example output (prefix = "[Resample] iter 1:"): # [Resample] iter 1: 0.0000000 0.0370370 0.9629630 printResampleFormatLine = function(prefix, y, digits = options()$digits) { + # get desired width for each col (if measure ids are short --> digits) # +3L to obtain spaces between cols - if (is.null(names(y))) + if (is.null(names(y))) { names(y) = y + } tab.width = max(stri_width(names(y)), digits) + 3L # if we get perf vals format decimals and add trailing zeros where needed - if (is.numeric(y)) + if (is.numeric(y)) { y = formatC(y, digits = digits, flag = "0", format = "f") + } # Extend witdh of prefix and y. width = 22 is the ideal size for # the prefix column. Change value here when iter.message was # modified in resample.R @@ -48,26 +56,31 @@ printResampleFormatLine = function(prefix, y, digits = options()$digits) { } removeFromDots = function(ns, ...) { + args = list(...) args[setdiff(names(args), ns)] } attachTrainingInfo = function(x, info) { + attr(x, "mlr.train.info") = info x } getTrainingInfo = function(x) { + attr(x, "mlr.train.info") %??% attr(x$learner.model, "mlr.train.info") } getLearnerOptions = function(lrn, opts) { + lrn.opts = getLeafLearner(lrn)$config setNames(lapply(opts, function(x) lrn.opts[[x]] %??% getMlrOption(x)), opts) } # p = probabilites for levs[2] => matrix with probs for levs[1] and levs[2] propVectorToMatrix = function(p, levs) { + assertNumeric(p) y = matrix(0, ncol = 2L, nrow = length(p)) colnames(y) = levs @@ -84,11 +97,13 @@ propVectorToMatrix = function(p, levs) { #' @return ([character]). #' @export listTaskTypes = function() { + c("classif", "regr", "surv", "costsens", "cluster", "multilabel") } # Maybe move to BBmisc at some point measureTime = function(expr, ee = parent.frame()) { + before = proc.time()[[3L]] force(expr) proc.time()[[3L]] - before @@ -97,15 +112,17 @@ measureTime = function(expr, ee = parent.frame()) { # find duplicate measure names or ids and paste together those # with the associated aggregation ids or names replaceDupeMeasureNames = function(measures, x = "id") { + assertList(measures, "Measure") assertChoice(x, c("id", "name")) meas.names = extractSubList(measures, x) dupes = table(meas.names) dupes = which(meas.names %in% names(dupes[dupes > 1])) - if (x == "id") + if (x == "id") { new.names = sapply(measures[dupes], function(x) measureAggrName(x)) - else + } else { new.names = sapply(measures[dupes], function(x) measureAggrPrettyName(x)) + } meas.names[dupes] = new.names unlist(meas.names) } @@ -113,24 +130,30 @@ replaceDupeMeasureNames = function(measures, x = "id") { # suppresses a warning iff the warning message contains the # substring `str`. suppressWarning = function(expr, str) { + withCallingHandlers(expr, warning = function(w) { - if (stri_detect_fixed(stri_flatten(w$message), str)) + + if (stri_detect_fixed(stri_flatten(w$message), str)) { invokeRestart("muffleWarning") + } }) } hasEmptyLevels = function(x) { + !all(levels(x) %chin% as.character(unique(x))) } # thin a vector thin = function(x, skip = 0) { + n = length(x) x[seq(1, n, by = skip)] } # scale window if < 1 scaleWindows = function(window, scaler) { + if (window < 1) { scaled.window = round(window * scaler) } else { @@ -141,23 +164,24 @@ scaleWindows = function(window, scaler) { # Create the resampling windows for growing and fixed window cross validation makeResamplingWindow = function(desc, size, task = NULL, coords, window.type) { + initial.window.abs = scaleWindows(desc$initial.window, size) horizon.window = scaleWindows(desc$horizon, initial.window.abs) if (size - initial.window.abs < horizon.window) { stop(catf("The initial window is %i observations while the data is %i observations. \n There is not enough data left (%i observations) to create a test set for a %i size horizon.", - initial.window.abs, size, initial.window.abs - size, horizon.window)) + initial.window.abs, size, initial.window.abs - size, horizon.window)) } if (window.type == "FixedWindowCV") { - stops = (seq(size))[initial.window.abs:(size - horizon.window)] + stops = (seq(size))[initial.window.abs:(size - horizon.window)] starts = stops - initial.window.abs + 1 train.inds = mapply(seq, starts, stops, SIMPLIFY = FALSE) - test.inds = mapply(seq, stops + 1, stops + horizon.window, SIMPLIFY = FALSE) + test.inds = mapply(seq, stops + 1, stops + horizon.window, SIMPLIFY = FALSE) } else if (window.type == "GrowingWindowCV") { - stops = (seq(from = 1, to = size))[initial.window.abs:(size - horizon.window)] + stops = (seq(from = 1, to = size))[initial.window.abs:(size - horizon.window)] starts = rep(1, length(stops)) train.inds = mapply(seq, starts, stops, SIMPLIFY = FALSE) - test.inds = mapply(seq, stops + 1, stops + horizon.window, SIMPLIFY = FALSE) + test.inds = mapply(seq, stops + 1, stops + horizon.window, SIMPLIFY = FALSE) } skip = scaleWindows(desc$skip, length(train.inds)) diff --git a/R/helpers_fda.R b/R/helpers_fda.R index 611fe8d6a0..6a3f962c47 100644 --- a/R/helpers_fda.R +++ b/R/helpers_fda.R @@ -6,11 +6,13 @@ fdFeatsToColumnIndex = function(df, fd.features = NULL, exclude.cols = NULL) { # If fd.features is NULL, all numerics are a single functional feature # Already existing matricies are not converted - if (is.null(fd.features)) + if (is.null(fd.features)) { fd.features = list("fd1" = setdiff(which(vlapply(df, is.numeric)), c(exclude.cols, fd.mats))) + } # Return the column index and check if indices/names refer to columns lapply(fd.features, function(fd.feature) { + if (is.character(fd.feature)) { assertSubset(fd.feature, colnames(df), empty.ok = FALSE) setdiff(which(colnames(df) %in% fd.feature), exclude.cols) @@ -24,12 +26,10 @@ fdFeatsToColumnIndex = function(df, fd.features = NULL, exclude.cols = NULL) { # Convert a data.frame containing functional features to a data.frame containing # them as numerics. functionalToNormalData = function(df) { + if (hasFunctionalFeatures(df)) { df = do.call(data.frame, as.list(df)) message("Functional features have been converted to numerics") } return(df) } - - - diff --git a/R/joinClassLevels.R b/R/joinClassLevels.R index d665870740..fa5b658530 100644 --- a/R/joinClassLevels.R +++ b/R/joinClassLevels.R @@ -9,21 +9,25 @@ #' @examples #' joinClassLevels(iris.task, new.levels = list(foo = c("setosa", "virginica"))) joinClassLevels = function(task, new.levels) { + UseMethod("joinClassLevels") } #' @export joinClassLevels.ClassifTask = function(task, new.levels) { + assertList(new.levels, types = "character", names = "unique") target = getTaskTargetNames(task) y = as.character(getTaskTargets(task)) nls1 = unlist(new.levels) nls2 = unique(nls1) d = setdiff(nls2, unique(y)) - if (length(d) > 0L) + if (length(d) > 0L) { stopf("You can only recode already existing class levels, but you also used: %s", collapse(d)) - if (length(nls2) != length(nls1)) + } + if (length(nls2) != length(nls1)) { stopf("Every existing class level in 'new.levels' can be used at most once!") + } new.names = names(new.levels) for (nn in new.names) { levs = new.levels[[nn]] diff --git a/R/learnerArgsToControl.R b/R/learnerArgsToControl.R index 525ec8bc99..7093f5201a 100644 --- a/R/learnerArgsToControl.R +++ b/R/learnerArgsToControl.R @@ -10,6 +10,7 @@ #' @return Control structure for learner. #' @export learnerArgsToControl = function(control, ...) { + args = list() dots = match.call(expand.dots = FALSE)$... for (i in seq_along(dots)) { @@ -17,14 +18,14 @@ learnerArgsToControl = function(control, ...) { is.missing = if (is.symbol(arg)) { argname = as.character(arg) eval(substitute(missing(symbol), list(symbol = arg)), - envir = parent.frame()) + envir = parent.frame()) } else { argname = names(dots)[i] FALSE } if (!is.missing) { value = tryCatch(eval(arg, envir = parent.frame()), - error = function(...) NULL) + error = function(...) NULL) if (!is.null(value)) { args[[as.character(argname)]] = value } diff --git a/R/listLearners.R b/R/listLearners.R index 93e2789009..152a3aa9af 100644 --- a/R/listLearners.R +++ b/R/listLearners.R @@ -1,10 +1,12 @@ getLearnerTable = function() { + ids = as.character(methods("makeRLearner")) ids = ids[!stri_detect_fixed(ids, "__mlrmocklearners__")] ids = stri_replace_first_fixed(ids, "makeRLearner.", "") slots = c("cl", "name", "short.name", "package", "properties", "note") ee = asNamespace("mlr") tab = rbindlist(lapply(ids, function(id) { + fun = getS3method("makeRLearner", id) row = lapply(as.list(functionBody(fun)[[2L]])[slots], eval, envir = ee) data.table( @@ -29,13 +31,16 @@ getLearnerTable = function() { } filterLearnerTable = function(tab = getLearnerTable(), types = character(0L), properties = character(0L), check.packages = FALSE) { + contains = function(lhs, rhs) all(lhs %in% rhs) - if (check.packages) + if (check.packages) { tab = tab[tab$installed] + } - if (length(types) > 0L && !isScalarNA(types)) + if (length(types) > 0L && !isScalarNA(types)) { tab = tab[tab$type %in% types] + } if (length(properties) > 0L) { i = vlapply(tab$properties, contains, lhs = properties) @@ -94,7 +99,7 @@ filterLearnerTable = function(tab = getLearnerTable(), types = character(0L), pr #' listLearners(task) #' } #' @export -listLearners = function(obj = NA_character_, properties = character(0L), +listLearners = function(obj = NA_character_, properties = character(0L), quiet = TRUE, warn.missing.packages = TRUE, check.packages = FALSE, create = FALSE) { assertSubset(properties, listLearnerProperties()) @@ -108,7 +113,7 @@ listLearners = function(obj = NA_character_, properties = character(0L), #' @export #' @rdname listLearners -listLearners.default = function(obj = NA_character_, properties = character(0L), +listLearners.default = function(obj = NA_character_, properties = character(0L), quiet = TRUE, warn.missing.packages = TRUE, check.packages = FALSE, create = FALSE) { listLearners.character(obj = NA_character_, properties, quiet, warn.missing.packages, check.packages, create) @@ -116,18 +121,22 @@ listLearners.default = function(obj = NA_character_, properties = character(0L) #' @export #' @rdname listLearners -listLearners.character = function(obj = NA_character_, properties = character(0L), quiet = TRUE, warn.missing.packages = TRUE, check.packages = FALSE, create = FALSE) { - if (!isScalarNA(obj)) +listLearners.character = function(obj = NA_character_, properties = character(0L), quiet = TRUE, warn.missing.packages = TRUE, check.packages = FALSE, create = FALSE) { + + if (!isScalarNA(obj)) { assertSubset(obj, listTaskTypes()) + } tab = getLearnerTable() - if (warn.missing.packages && !all(tab$installed)) + if (warn.missing.packages && !all(tab$installed)) { warningf("The following learners could not be constructed, probably because their packages are not installed:\n%s\nCheck ?learners to see which packages you need or install mlr with all suggestions.", collapse(tab[!tab$installed]$id)) + } tab = filterLearnerTable(tab, types = obj, properties = properties, check.packages = check.packages && !create) - if (create) + if (create) { return(lapply(tab$id[tab$installed], makeLearner)) + } tab$package = vcapply(tab$package, collapse) properties = listLearnerProperties() @@ -162,5 +171,6 @@ listLearners.Task = function(obj = NA_character_, properties = character(0L), #' @export print.ListLearners = function(x, ...) { + printHead(as.data.frame(dropNamed(x, drop = "note")), ...) } diff --git a/R/listMeasures.R b/R/listMeasures.R index f1ead56036..1ae2c18291 100644 --- a/R/listMeasures.R +++ b/R/listMeasures.R @@ -16,8 +16,10 @@ #' measures or instantiated objects. #' @export listMeasures = function(obj, properties = character(0L), create = FALSE) { - if (!missing(obj)) + + if (!missing(obj)) { assert(checkCharacter(obj), checkClass(obj, "Task")) + } assertSubset(properties, listMeasureProperties()) assertFlag(create) UseMethod("listMeasures") @@ -26,32 +28,39 @@ listMeasures = function(obj, properties = character(0L), create = FALSE) { #' @rdname listMeasures #' @export listMeasures.default = function(obj, properties = character(0L), create = FALSE) { + listMeasures2(properties, create) } #' @rdname listMeasures #' @export listMeasures.character = function(obj, properties = character(0L), create = FALSE) { + assertChoice(obj, choices = c("classif", "multilabel", "regr", "surv", "costsens", "cluster", NA_character_)) - if (is.na(obj)) + if (is.na(obj)) { obj = character(0L) + } listMeasures2(union(obj, properties), create) } #' @rdname listMeasures #' @export listMeasures.Task = function(obj, properties = character(0L), create = FALSE) { + td = obj$task.desc - if (td$type == "classif" && length(td$class.levels) > 2L) + if (td$type == "classif" && length(td$class.levels) > 2L) { properties = union(properties, "classif.multi") + } listMeasures.character(td$type, properties = properties, create) } listMeasures2 = function(properties = character(0L), create = FALSE) { + ee = as.environment("package:mlr") res = Filter(function(x) inherits(x, "Measure") && all(properties %in% getMeasureProperties(x)), as.list(ee)) - if (create) + if (create) { res - else + } else { names(res) + } } diff --git a/R/logFunOpt.R b/R/logFunOpt.R index dfe5da3472..f516e2b9fc 100644 --- a/R/logFunOpt.R +++ b/R/logFunOpt.R @@ -47,8 +47,9 @@ logFunTune = function(learner, task, resampling, measures, par.set, control, opt x.string = paramValueToString(par.set, x, show.missing.values = !remove.nas) # shorten tuning logging a bit. we remove the sel.learner prefix from params - if (inherits(learner, "ModelMultiplexer")) + if (inherits(learner, "ModelMultiplexer")) { x.string = stri_replace_all(x.string, "", regex = stri_paste(x$selected.learner, "\\.")) + } logFunDefault(learner, task, resampling, measures, par.set, control, opt.path, dob, x.string, y, remove.nas, stage, prev.stage, prefixes = c("Tune-x", "Tune-y") @@ -60,8 +61,9 @@ logFunTuneMemory = function(learner, task, resampling, measures, par.set, contro x.string = paramValueToString(par.set, x, show.missing.values = !remove.nas) # shorten tuning logging a bit. we remove the sel.learner prefix from params - if (inherits(learner, "ModelMultiplexer")) + if (inherits(learner, "ModelMultiplexer")) { x.string = stri_replace_all(x.string, "", regex = stri_paste(x$selected.learner, "\\.")) + } logFunMemory(learner, task, resampling, measures, par.set, control, opt.path, dob, x.string, y, remove.nas, stage, prev.stage, prefixes = c("Tune-x", "Tune-y") @@ -87,4 +89,3 @@ logFunFeatSelMemory = function(learner, task, resampling, measures, par.set, con x.string, y, remove.nas, stage, prev.stage, prefixes = c("FeatSel-x", "FeatSel-y") ) } - diff --git a/R/makeFunctionalData.R b/R/makeFunctionalData.R index be5952101b..752bd7053c 100644 --- a/R/makeFunctionalData.R +++ b/R/makeFunctionalData.R @@ -32,6 +32,7 @@ #' # Create a regression task #' makeRegrTask(data = d2, target = "target") makeFunctionalData = function(data, fd.features = NULL, exclude.cols = NULL) { + assertDataFrame(data) assertList(fd.features, null.ok = TRUE, names = "unique") # Assert that exclude.cols refers to valid columns and convert to index @@ -43,22 +44,26 @@ makeFunctionalData = function(data, fd.features = NULL, exclude.cols = NULL) { } # If fd.features is an empty list do nothing - if (is.list(fd.features) && length(fd.features) == 0L) + if (is.list(fd.features) && length(fd.features) == 0L) { return(data) + } # Convert fd.features to column indices fd.features = fdFeatsToColumnIndex(data, fd.features, exclude.cols) # All fd.features must refer to numeric columns - if (!all(vlapply(data[, unlist(fd.features), drop = FALSE], is.numeric))) + if (!all(vlapply(data[, unlist(fd.features), drop = FALSE], is.numeric))) { stop("fd.features contains non-integer/numeric columns") + } # Create a list of functional feature matricies - ffeats = lapply(fd.features, function(x) {as.matrix(data[, x, drop = FALSE])}) + ffeats = lapply(fd.features, function(x) { + + as.matrix(data[, x, drop = FALSE]) + }) # Drop original numeric data - d = data[, - unlist(fd.features), drop = FALSE] + d = data[, -unlist(fd.features), drop = FALSE] # Add functional feature matricies d[, names(fd.features)] = ffeats return(d) } - diff --git a/R/makeLearner.R b/R/makeLearner.R index 004929c18d..6a2a30e5e5 100644 --- a/R/makeLearner.R +++ b/R/makeLearner.R @@ -55,8 +55,9 @@ makeLearner = function(cl, id = cl, predict.type = "response", predict.threshold assertString(cl) assertFlag(fix.factors.prediction) assertList(config, names = "named") - if ("show.info" %in% names(config)) + if ("show.info" %in% names(config)) { stop("'show.info' cannot be set in 'makeLearner', please use 'configureMlr' instead.") + } assertSubset(names(config), choices = names(getMlrOptions())) constructor = try(getS3method("makeRLearner", class = cl), silent = TRUE) if (inherits(constructor, "try-error")) { @@ -74,15 +75,17 @@ makeLearner = function(cl, id = cl, predict.type = "response", predict.threshold # predict.threshold is checked in setter below assertList(par.vals, names = "unique") - if (stri_isempty(cl)) + if (stri_isempty(cl)) { stop("Cannot create learner from empty string!") - if (!inherits(wl, "RLearner")) + } + if (!inherits(wl, "RLearner")) { stop("Learner must be a basic RLearner!") + } wl = setHyperPars(learner = wl, ..., par.vals = par.vals) wl = setPredictType(learner = wl, predict.type = predict.type) - if (!is.null(predict.threshold)) + if (!is.null(predict.threshold)) { wl = setPredictThreshold(wl, predict.threshold) + } wl$fix.factors.prediction = fix.factors.prediction return(wl) } - diff --git a/R/makeLearners.R b/R/makeLearners.R index 7f1d7d2185..d95c17b611 100644 --- a/R/makeLearners.R +++ b/R/makeLearners.R @@ -19,6 +19,7 @@ #' @examples #' makeLearners(c("rpart", "lda"), type = "classif", predict.type = "prob") makeLearners = function(cls, ids = NULL, type = NULL, ...) { + if (!is.null(type)) { assertChoice(type, listTaskTypes()) cls = stri_paste(type, cls, sep = ".") @@ -30,5 +31,3 @@ makeLearners = function(cls, ids = NULL, type = NULL, ...) { lrns = mapply(makeLearner, cl = cls, id = ids, MoreArgs = list(...), SIMPLIFY = FALSE) setNames(lrns, ids) } - - diff --git a/R/measures.R b/R/measures.R index 60a181f5bb..89b811b9df 100644 --- a/R/measures.R +++ b/R/measures.R @@ -46,11 +46,11 @@ NULL featperc = makeMeasure(id = "featperc", minimize = TRUE, best = 0, worst = 1, properties = c("classif", "classif.multi", "multilabel", "regr", "surv", "costsens", "cluster", "req.model", "req.pred"), name = "Percentage of original features used for model", - note = "Useful for feature selection.", + note = "Useful for feature selection.", fun = function(task, model, pred, feats, extra.args) { + length(model$features) / sum(pred$task.desc$n.feat) - } -) + }) #' @export timetrain #' @rdname measures @@ -59,9 +59,9 @@ timetrain = makeMeasure(id = "timetrain", minimize = TRUE, best = 0, worst = Inf properties = c("classif", "classif.multi", "multilabel", "regr", "surv", "costsens", "cluster", "req.model"), name = "Time of fitting the model", fun = function(task, model, pred, feats, extra.args) { + model$time - } -) + }) #' @export timepredict #' @rdname measures @@ -70,9 +70,9 @@ timepredict = makeMeasure(id = "timepredict", minimize = TRUE, best = 0, worst = properties = c("classif", "classif.multi", "multilabel", "regr", "surv", "costsens", "cluster", "req.pred"), name = "Time of predicting test set", fun = function(task, model, pred, feats, extra.args) { + pred$time - } -) + }) #' @export timeboth #' @rdname measures @@ -81,9 +81,9 @@ timeboth = makeMeasure(id = "timeboth", minimize = TRUE, best = 0, worst = Inf, properties = c("classif", "classif.multi", "multilabel", "regr", "surv", "costsens", "cluster", "req.model", "req.pred"), name = "timetrain + timepredict", fun = function(task, model, pred, feats, extra.args) { + model$time + pred$time - } -) + }) ############################################################################### ### regression ### @@ -97,14 +97,15 @@ sse = makeMeasure(id = "sse", minimize = TRUE, best = 0, worst = Inf, name = "Sum of squared errors", note = "Defined as: sum((response - truth)^2)", fun = function(task, model, pred, feats, extra.args) { + measureSSE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureSSE #' @rdname measures #' @format none measureSSE = function(truth, response) { + sum((response - truth)^2) } @@ -116,14 +117,15 @@ mse = makeMeasure(id = "mse", minimize = TRUE, best = 0, worst = Inf, name = "Mean of squared errors", note = "Defined as: mean((response - truth)^2)", fun = function(task, model, pred, feats, extra.args) { + measureMSE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureMSE #' @rdname measures #' @format none measureMSE = function(truth, response) { + mean((response - truth)^2) } @@ -136,6 +138,7 @@ rmse = makeMeasure(id = "rmse", minimize = TRUE, best = 0, worst = Inf, name = "Root mean squared error", note = "The RMSE is aggregated as sqrt(mean(rmse.vals.on.test.sets^2)). If you don't want that, you could also use `test.mean`.", fun = function(task, model, pred, feats, extra.args) { + measureRMSE(pred$data$truth, pred$data$response) }, aggr = test.rmse @@ -145,6 +148,7 @@ rmse = makeMeasure(id = "rmse", minimize = TRUE, best = 0, worst = Inf, #' @rdname measures #' @format none measureRMSE = function(truth, response) { + sqrt(measureMSE(truth, response)) } @@ -156,14 +160,15 @@ medse = makeMeasure(id = "medse", minimize = TRUE, best = 0, worst = Inf, name = "Median of squared errors", note = "Defined as: median((response - truth)^2).", fun = function(task, model, pred, feats, extra.args) { + measureMEDSE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureMEDSE #' @rdname measures #' @format none measureMEDSE = function(truth, response) { + median((response - truth)^2) } @@ -175,14 +180,15 @@ sae = makeMeasure(id = "sae", minimize = TRUE, best = 0, worst = Inf, name = "Sum of absolute errors", note = "Defined as: sum(abs(response - truth))", fun = function(task, model, pred, feats, extra.args) { + measureSAE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureSAE #' @rdname measures #' @format none measureSAE = function(truth, response) { + sum(abs(response - truth)) } @@ -194,14 +200,15 @@ mae = makeMeasure(id = "mae", minimize = TRUE, best = 0, worst = Inf, name = "Mean of absolute errors", note = "Defined as: mean(abs(response - truth))", fun = function(task, model, pred, feats, extra.args) { + measureMAE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureMAE #' @rdname measures #' @format none measureMAE = function(truth, response) { + mean(abs(response - truth)) } @@ -213,14 +220,15 @@ medae = makeMeasure(id = "medae", minimize = TRUE, best = 0, worst = Inf, name = "Median of absolute errors", note = "Defined as: median(abs(response - truth)).", fun = function(task, model, pred, feats, extra.args) { + measureMEDAE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureMEDAE #' @rdname measures #' @format none measureMEDAE = function(truth, response) { + median(abs(response - truth)) } @@ -232,17 +240,18 @@ rsq = makeMeasure(id = "rsq", minimize = FALSE, best = 1, worst = -Inf, name = "Coefficient of determination", note = "Also called R-squared, which is 1 - residual_sum_of_squares / total_sum_of_squares.", fun = function(task, model, pred, feats, extra.args) { + measureRSQ(pred$data$truth, pred$data$response) - } -) + }) #' @export measureRSQ #' @rdname measures #' @format none measureRSQ = function(truth, response) { + rss = measureSSE(truth, response) ess = sum((truth - mean(truth))^2L) - if (ess == 0){ + if (ess == 0) { warning("Measure is undefined if all truth values are equal.") return(NA_real_) } @@ -257,17 +266,18 @@ expvar = makeMeasure(id = "expvar", minimize = FALSE, best = 1, worst = 0, name = "Explained variance", note = "Similar to measure rsq (R-squared). Defined as explained_sum_of_squares / total_sum_of_squares.", fun = function(task, model, pred, feats, extra.args) { + measureEXPVAR(pred$data$truth, pred$data$response) - } -) + }) #' @export measureEXPVAR #' @rdname measures #' @format none measureEXPVAR = function(truth, response) { + regss = sum((response - mean(truth))^2L) ess = sum((truth - mean(truth))^2L) - if (ess == 0){ + if (ess == 0) { warning("Measure is undefined if all truth values are equal.") return(NA_real_) } @@ -282,15 +292,15 @@ arsq = makeMeasure(id = "arsq", minimize = FALSE, best = 1, worst = 0, name = "Adjusted coefficient of determination", note = "Defined as: 1 - (1 - rsq) * (p / (n - p - 1L)). Adjusted R-squared is only defined for normal linear regression.", fun = function(task, model, pred, feats, extra.args) { + n = length(pred$data$truth) p = length(model$features) - if (n == p + 1){ + if (n == p + 1) { warning("Adjusted R-squared is undefined if the number observations is equal to the number of independent variables plus one.") return(NA_real_) } 1 - (1 - measureRSQ(pred$data$truth, pred$data$response)) * (p / (n - p - 1L)) - } -) + }) #' @export rrse #' @rdname measures @@ -300,16 +310,17 @@ rrse = makeMeasure(id = "rrse", minimize = TRUE, best = 0, worst = Inf, name = "Root relative squared error", note = "Defined as sqrt (sum_of_squared_errors / total_sum_of_squares). Undefined for single instances and when every truth value is identical. In this case the output will be NA.", fun = function(task, model, pred, feats, extra.args) { + measureRRSE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureRRSE #' @rdname measures #' @format none -measureRRSE = function(truth, response){ +measureRRSE = function(truth, response) { + tss = sum((truth - mean(truth))^2L) - if (tss == 0){ + if (tss == 0) { warning("Measure is undefined if all truth values are equal.") return(NA_real_) } @@ -324,16 +335,17 @@ rae = makeMeasure(id = "rae", minimize = TRUE, best = 0, worst = Inf, name = "Relative absolute error", note = "Defined as sum_of_absolute_errors / mean_absolute_deviation. Undefined for single instances and when every truth value is identical. In this case the output will be NA.", fun = function(task, model, pred, feats, extra.args) { + measureRAE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureRAE #' @rdname measures #' @format none -measureRAE = function(truth, response){ +measureRAE = function(truth, response) { + meanad = sum(abs(truth - mean(truth))) - if (meanad == 0){ + if (meanad == 0) { warning("Measure is undefined if all truth values are equal.") return(NA_real_) } @@ -348,15 +360,16 @@ mape = makeMeasure(id = "mape", minimize = TRUE, best = 0, worst = Inf, name = "Mean absolute percentage error", note = "Defined as the abs(truth_i - response_i) / truth_i. Won't work if any truth value is equal to zero. In this case the output will be NA.", fun = function(task, model, pred, feats, extra.args) { + measureMAPE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureMAPE #' @rdname measures #' @format none -measureMAPE = function(truth, response){ - if (any(truth == 0)){ +measureMAPE = function(truth, response) { + + if (any(truth == 0)) { warning("Measure is undefined if any truth value is equal to 0.") return(NA_real_) } @@ -373,18 +386,21 @@ msle = makeMeasure(id = "msle", minimize = TRUE, best = 0, worst = Inf, This measure is mostly used for count data, note that all predicted and actual target values must be greater or equal '-1' to compute the measure.", fun = function(task, model, pred, feats, extra.args) { + measureMSLE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureMSLE #' @rdname measures #' @format none measureMSLE = function(truth, response) { - if (any(truth < -1)) + + if (any(truth < -1)) { stop("All truth values must be greater or equal -1") - if (any(response < -1)) + } + if (any(response < -1)) { stop("All predicted values must be greater or equal -1") + } mean((log(response + 1) - log(truth + 1))^2) } @@ -400,14 +416,15 @@ rmsle = makeMeasure(id = "rmsle", minimize = TRUE, best = 0, worst = Inf, This measure is mostly used for count data, note that all predicted and actual target values must be greater or equal '-1' to compute the measure.", fun = function(task, model, pred, feats, extra.args) { + measureRMSLE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureRMSLE #' @rdname measures #' @format none measureRMSLE = function(truth, response) { + sqrt(measureMSLE(truth, response)) } @@ -420,14 +437,15 @@ kendalltau = makeMeasure(id = "kendalltau", minimize = FALSE, best = 1, worst = note = "Defined as: Kendall's tau correlation between truth and response. Only looks at the order. See Rosset et al.: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.95.1398&rep=rep1&type=pdf.", fun = function(task, model, pred, feats, extra.args) { + measureKendallTau(pred$data$truth, pred$data$response) - } -) + }) #' @export measureKendallTau #' @rdname measures #' @format none measureKendallTau = function(truth, response) { + cor(truth, response, use = "na.or.complete", method = "kendall") } @@ -440,14 +458,15 @@ spearmanrho = makeMeasure(id = "spearmanrho", minimize = FALSE, best = 1, worst note = "Defined as: Spearman's rho correlation between truth and response. Only looks at the order. See Rosset et al.: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.95.1398&rep=rep1&type=pdf.", fun = function(task, model, pred, feats, extra.args) { + measureSpearmanRho(pred$data$truth, pred$data$response) - } -) + }) #' @export measureSpearmanRho #' @rdname measures #' @format none measureSpearmanRho = function(truth, response) { + cor(truth, response, use = "na.or.complete", method = "spearman") } @@ -462,14 +481,15 @@ mmce = makeMeasure(id = "mmce", minimize = TRUE, best = 0, worst = 1, name = "Mean misclassification error", note = "Defined as: mean(response != truth)", fun = function(task, model, pred, feats, extra.args) { + measureMMCE(pred$data$truth, pred$data$response) - } -) + }) #' @export measureMMCE #' @rdname measures #' @format none measureMMCE = function(truth, response) { + mean(response != truth) } @@ -481,14 +501,15 @@ acc = makeMeasure(id = "acc", minimize = FALSE, best = 1, worst = 0, name = "Accuracy", note = "Defined as: mean(response == truth)", fun = function(task, model, pred, feats, extra.args) { + measureACC(pred$data$truth, pred$data$response) - } -) + }) #' @export measureACC #' @rdname measures #' @format none measureACC = function(truth, response) { + mean(response == truth) } @@ -500,17 +521,19 @@ ber = makeMeasure(id = "ber", minimize = TRUE, best = 0, worst = 1, name = "Balanced error rate", note = "Mean of misclassification error rates on all individual classes.", fun = function(task, model, pred, feats, extra.args) { + measureBER(pred$data$truth, pred$data$response) - } -) + }) #' @export measureBER #' @rdname measures #' @format none measureBER = function(truth, response) { + # special case for predictions from FailureModel - if (anyMissing(response)) + if (anyMissing(response)) { return(NA_real_) + } mean(diag(1 - (table(truth, response) / table(truth, truth)))) } @@ -522,15 +545,16 @@ multiclass.aunu = makeMeasure(id = "multiclass.aunu", minimize = FALSE, best = 1 name = "Average 1 vs. rest multiclass AUC", note = "Computes the AUC treating a c-dimensional classifier as c two-dimensional classifiers, where classes are assumed to have uniform distribution, in order to have a measure which is independent of class distribution change. See Ferri et al.: https://www.math.ucdavis.edu/~saito/data/roc/ferri-class-perf-metrics.pdf.", fun = function(task, model, pred, feats, extra.args) { + measureAUNU(getPredictionProbabilities(pred, pred$task.desc$class.levels), pred$data$truth) - } -) + }) #' @export measureAUNU #' @rdname measures #' @format none measureAUNU = function(probabilities, truth) { - if (length(unique(truth)) != nlevels(truth)){ + + if (length(unique(truth)) != nlevels(truth)) { warning("Measure is undefined if there isn't at least one sample per class.") return(NA_real_) } @@ -545,15 +569,16 @@ multiclass.aunp = makeMeasure(id = "multiclass.aunp", minimize = FALSE, best = 1 name = "Weighted average 1 vs. rest multiclass AUC", note = "Computes the AUC treating a c-dimensional classifier as c two-dimensional classifiers, taking into account the prior probability of each class. See Ferri et al.: https://www.math.ucdavis.edu/~saito/data/roc/ferri-class-perf-metrics.pdf.", fun = function(task, model, pred, feats, extra.args) { + measureAUNP(getPredictionProbabilities(pred, pred$task.desc$class.levels), pred$data$truth) - } -) + }) #' @export measureAUNP #' @rdname measures #' @format none measureAUNP = function(probabilities, truth) { - if (length(unique(truth)) != nlevels(truth)){ + + if (length(unique(truth)) != nlevels(truth)) { warning("Measure is undefined if there isn't at least one sample per class.") return(NA_real_) } @@ -566,16 +591,17 @@ measureAUNP = function(probabilities, truth) { multiclass.au1u = makeMeasure(id = "multiclass.au1u", minimize = FALSE, best = 1, worst = 0.5, properties = c("classif", "classif.multi", "req.pred", "req.truth", "req.prob"), name = "Average 1 vs. 1 multiclass AUC", - note = "Computes AUC of c(c - 1) binary classifiers (all possible pairwise combinations) while considering uniform distribution of the classes. See Ferri et al.: https://www.math.ucdavis.edu/~saito/data/roc/ferri-class-perf-metrics.pdf.", + note = "Computes AUC of c(c - 1) binary classifiers (all possible pairwise combinations) while considering uniform distribution of the classes. See Ferri et al.: https://www.math.ucdavis.edu/~saito/data/roc/ferri-class-perf-metrics.pdf.", fun = function(task, model, pred, feats, extra.args) { + measureAU1U(getPredictionProbabilities(pred, pred$task.desc$class.levels), pred$data$truth) - } -) + }) #' @export measureAU1U #' @rdname measures #' @format none measureAU1U = function(probabilities, truth) { + m = colAUC(probabilities, truth) c = c(combn(1:nlevels(truth), 2)) mean(m[cbind(rep(seq_len(nrow(m)), each = 2), c)]) @@ -589,14 +615,15 @@ multiclass.au1p = makeMeasure(id = "multiclass.au1p", minimize = FALSE, best = 1 name = "Weighted average 1 vs. 1 multiclass AUC", note = "Computes AUC of c(c - 1) binary classifiers while considering the a priori distribution of the classes. See Ferri et al.: https://www.math.ucdavis.edu/~saito/data/roc/ferri-class-perf-metrics.pdf.", fun = function(task, model, pred, feats, extra.args) { + measureAU1P(getPredictionProbabilities(pred, pred$task.desc$class.levels), pred$data$truth) - } -) + }) #' @export measureAU1P #' @rdname measures #' @format none measureAU1P = function(probabilities, truth) { + m = colAUC(probabilities, truth) weights = table(truth) / length(truth) m = m * matrix(rep(weights, each = nrow(m)), ncol = length(weights)) @@ -612,14 +639,15 @@ multiclass.brier = makeMeasure(id = "multiclass.brier", minimize = TRUE, best = name = "Multiclass Brier score", note = "Defined as: (1/n) sum_i sum_j (y_ij - p_ij)^2, where y_ij = 1 if observation i has class j (else 0), and p_ij is the predicted probability of observation i for class j. From http://docs.lib.noaa.gov/rescue/mwr/078/mwr-078-01-0001.pdf.", fun = function(task, model, pred, feats, extra.args) { + measureMulticlassBrier(getPredictionProbabilities(pred, pred$task.desc$class.levels), pred$data$truth) - } -) + }) #' @export measureMulticlassBrier #' @rdname measures #' @format none measureMulticlassBrier = function(probabilities, truth) { + truth = factor(truth, levels = colnames(probabilities)) mat01 = createDummyFeatures(truth) mean(rowSums((probabilities - mat01)^2)) @@ -633,16 +661,17 @@ logloss = makeMeasure(id = "logloss", minimize = TRUE, best = 0, worst = Inf, name = "Logarithmic loss", note = "Defined as: -mean(log(p_i)), where p_i is the predicted probability of the true class of observation i. Inspired by https://www.kaggle.com/wiki/MultiClassLogLoss.", fun = function(task, model, pred, feats, extra.args) { + measureLogloss(getPredictionProbabilities(pred, cl = pred$task.desc$class.levels), pred$data$truth) - } -) + }) #' @export measureLogloss #' @rdname measures #' @format none -measureLogloss = function(probabilities, truth){ +measureLogloss = function(probabilities, truth) { + eps = 1e-15 - #let's confine the predicted probabilities to [eps,1 - eps], so logLoss doesn't reach infinity under any circumstance + # let's confine the predicted probabilities to [eps,1 - eps], so logLoss doesn't reach infinity under any circumstance probabilities[probabilities > 1 - eps] = 1 - eps probabilities[probabilities < eps] = eps truth = match(as.character(truth), colnames(probabilities)) @@ -659,14 +688,15 @@ ssr = makeMeasure(id = "ssr", minimize = FALSE, best = 1, worst = 0, note = "Defined as: mean(p_i(sum_j(p_ij))), where p_i is the predicted probability of the true class of observation i and p_ij is the predicted probablity of observation i for class j. See: Bickel, J. E. (2007). Some comparisons among quadratic, spherical, and logarithmic scoring rules. Decision Analysis, 4(2), 49-65.", fun = function(task, model, pred, feats, extra.args) { + measureSSR(getPredictionProbabilities(pred, cl = pred$task.desc$class.levels), pred$data$truth) - } -) + }) #' @export measureSSR #' @rdname measures #' @format none -measureSSR = function(probabilities, truth){ +measureSSR = function(probabilities, truth) { + truth = match(as.character(truth), colnames(probabilities)) p = getRowEls(probabilities, truth) mean(p / sqrt(rowSums(probabilities^2))) @@ -682,15 +712,16 @@ qsr = makeMeasure(id = "qsr", minimize = FALSE, best = 1, worst = -1, This scoring rule is the same as 1 - multiclass.brier. See: Bickel, J. E. (2007). Some comparisons among quadratic, spherical, and logarithmic scoring rules. Decision Analysis, 4(2), 49-65.", fun = function(task, model, pred, feats, extra.args) { + measureQSR(getPredictionProbabilities(pred, cl = pred$task.desc$class.levels), pred$data$truth) - } -) + }) #' @export measureQSR #' @rdname measures #' @format none -measureQSR = function(probabilities, truth){ - #We add this line because binary tasks only output one probability column +measureQSR = function(probabilities, truth) { + + # We add this line because binary tasks only output one probability column if (is.null(dim(probabilities))) probabilities = cbind(probabilities, 1 - probabilities) truth = factor(truth, levels = colnames(probabilities)) 1 - mean(rowSums((probabilities - createDummyFeatures(truth))^2)) @@ -706,14 +737,15 @@ lsr = makeMeasure(id = "lsr", minimize = FALSE, best = 0, worst = -Inf, This scoring rule is the same as the negative logloss, self-information or surprisal. See: Bickel, J. E. (2007). Some comparisons among quadratic, spherical, and logarithmic scoring rules. Decision Analysis, 4(2), 49-65.", fun = function(task, model, pred, feats, extra.args) { + measureLSR(getPredictionProbabilities(pred, cl = pred$task.desc$class.levels), pred$data$truth) - } -) + }) #' @export measureLSR #' @rdname measures #' @format none -measureLSR = function(probabilities, truth){ +measureLSR = function(probabilities, truth) { + -1 * measureLogloss(probabilities, truth) } @@ -726,14 +758,15 @@ kappa = makeMeasure(id = "kappa", minimize = FALSE, best = 1, worst = -1, note = "Defined as: 1 - (1 - p0) / (1 - pe). With: p0 = 'observed frequency of agreement' and pe = 'expected agremeent frequency under independence", fun = function(task, model, pred, feats, extra.args) { + measureKAPPA(pred$data$truth, pred$data$response) - } -) + }) #' @export measureKAPPA #' @rdname measures #' @format none measureKAPPA = function(truth, response) { + # get confusion matrix conf.mat = table(truth, response) conf.mat = conf.mat / sum(conf.mat) @@ -759,14 +792,15 @@ wkappa = makeMeasure(id = "wkappa", minimize = FALSE, best = 1, worst = -1, note = "Defined as: 1 - sum(weights * conf.mat) / sum(weights * expected.mat), the weight matrix measures seriousness of disagreement with the squared euclidean metric.", fun = function(task, model, pred, feats, extra.args) { + measureWKAPPA(pred$data$truth, pred$data$response) - } -) + }) #' @export measureWKAPPA #' @rdname measures #' @format none measureWKAPPA = function(truth, response) { + # get confusion matrix conf.mat = table(truth, response) conf.mat = conf.mat / sum(conf.mat) @@ -795,16 +829,18 @@ auc = makeMeasure(id = "auc", minimize = FALSE, best = 1, worst = 0, name = "Area under the curve", note = "Integral over the graph that results from computing fpr and tpr for many different thresholds.", fun = function(task, model, pred, feats, extra.args) { - if (anyMissing(pred$data$response) || length(unique(pred$data$truth)) == 1L) + + if (anyMissing(pred$data$response) || length(unique(pred$data$truth)) == 1L) { return(NA_real_) + } measureAUC(getPredictionProbabilities(pred), pred$data$truth, pred$task.desc$negative, pred$task.desc$positive) - } -) + }) #' @export measureAUC #' @rdname measures #' @format none measureAUC = function(probabilities, truth, negative, positive) { + if (is.factor(truth)) { i = as.integer(truth) == which(levels(truth) == positive) } else { @@ -813,7 +849,7 @@ measureAUC = function(probabilities, truth, negative, positive) { if (length(unique(i)) < 2L) { stop("truth vector must have at least two classes") } - #Use fast ranking function from data.table for larger vectors + # Use fast ranking function from data.table for larger vectors if (length(i) > 5000L) { r = frankv(probabilities) } else { @@ -834,14 +870,15 @@ brier = makeMeasure(id = "brier", minimize = TRUE, best = 0, worst = 1, That means we use the numeric representation 1 and 0 for our target classes. It is similiar to the mean squared error in regression. multiclass.brier is the sum over all one vs. all comparisons and for a binary classifcation 2 * brier.", fun = function(task, model, pred, feats, extra.args) { + measureBrier(getPredictionProbabilities(pred), pred$data$truth, pred$task.desc$negative, pred$task.desc$positive) - } -) + }) #' @export measureBrier #' @rdname measures #' @format none measureBrier = function(probabilities, truth, negative, positive) { + y = as.numeric(truth == positive) mean((y - probabilities)^2) } @@ -854,14 +891,15 @@ brier.scaled = makeMeasure(id = "brier.scaled", minimize = FALSE, best = 1, wors name = "Brier scaled", note = "Brier score scaled to [0,1], see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3575184/.", fun = function(task, model, pred, feats, extra.args) { + measureBrierScaled(getPredictionProbabilities(pred), pred$data$truth, pred$task.desc$negative, pred$task.desc$positive) - } -) + }) #' @export measureBrierScaled #' @rdname measures #' @format none measureBrierScaled = function(probabilities, truth, negative, positive) { + y = as.numeric(truth == positive) brier = mean((y - probabilities)^2) inc = mean(probabilities) @@ -877,15 +915,16 @@ bac = makeMeasure(id = "bac", minimize = FALSE, best = 1, worst = 0, name = "Balanced accuracy", note = "For binary tasks, mean of true positive rate and true negative rate.", fun = function(task, model, pred, feats, extra.args) { + measureBAC(pred$data$truth, pred$data$response) - } -) + }) #' @export measureBAC #' @rdname measures #' @format none measureBAC = function(truth, response) { - mean(diag(table(truth, response) / table(truth, truth))) + + mean(diag(table(truth, response) / table(truth, truth))) } #' @export tp @@ -896,14 +935,15 @@ tp = makeMeasure(id = "tp", minimize = FALSE, best = Inf, worst = 0, name = "True positives", note = "Sum of all correctly classified observations in the positive class.", fun = function(task, model, pred, feats, extra.args) { + measureTP(pred$data$truth, pred$data$response, pred$task.desc$positive) - } -) + }) #' @export measureTP #' @rdname measures #' @format none measureTP = function(truth, response, positive) { + sum(truth == response & response == positive) } @@ -915,14 +955,15 @@ tn = makeMeasure(id = "tn", minimize = FALSE, best = Inf, worst = 0, name = "True negatives", note = "Sum of correctly classified observations in the negative class. Also called correct rejections.", fun = function(task, model, pred, feats, extra.args) { + measureTN(pred$data$truth, pred$data$response, pred$task.desc$negative) - } -) + }) #' @export measureTN #' @rdname measures #' @format none measureTN = function(truth, response, negative) { + sum(truth == response & response == negative) } @@ -934,14 +975,15 @@ fp = makeMeasure(id = "fp", minimize = TRUE, best = 0, worst = Inf, name = "False positives", note = "Sum of misclassified observations in the positive class. Also called false alarms.", fun = function(task, model, pred, feats, extra.args) { + measureFP(pred$data$truth, pred$data$response, pred$task.desc$positive) - } -) + }) #' @export measureFP #' @rdname measures #' @format none measureFP = function(truth, response, positive) { + sum(truth != response & response == positive) } @@ -953,14 +995,15 @@ fn = makeMeasure(id = "fn", minimize = TRUE, best = 0, worst = Inf, name = "False negatives", note = "Sum of misclassified observations in the negative class. Also called misses.", fun = function(task, model, pred, feats, extra.args) { + measureFN(pred$data$truth, pred$data$response, pred$task.desc$negative) - } -) + }) #' @export measureFN #' @rdname measures #' @format none measureFN = function(truth, response, negative) { + sum(truth != response & response == negative) } @@ -972,14 +1015,15 @@ tpr = makeMeasure(id = "tpr", minimize = FALSE, best = 1, worst = 0, name = "True positive rate", note = "Percentage of correctly classified observations in the positive class. Also called hit rate or recall or sensitivity.", fun = function(task, model, pred, feats, extra.args) { + measureTPR(pred$data$truth, pred$data$response, pred$task.desc$positive) - } -) + }) #' @export measureTPR #' @rdname measures #' @format none measureTPR = function(truth, response, positive) { + measureTP(truth, response, positive) / sum(truth == positive) } @@ -991,14 +1035,15 @@ tnr = makeMeasure(id = "tnr", minimize = FALSE, best = 1, worst = 0, name = "True negative rate", note = "Percentage of correctly classified observations in the negative class. Also called specificity.", fun = function(task, model, pred, feats, extra.args) { + measureTNR(pred$data$truth, pred$data$response, pred$task.desc$negative) - } -) + }) #' @export measureTNR #' @rdname measures #' @format none measureTNR = function(truth, response, negative) { + measureTN(truth, response, negative) / sum(truth == negative) } @@ -1010,14 +1055,15 @@ fpr = makeMeasure(id = "fpr", minimize = TRUE, best = 0, worst = 1, name = "False positive rate", note = "Percentage of misclassified observations in the positive class. Also called false alarm rate or fall-out.", fun = function(task, model, pred, feats, extra.args) { + measureFPR(pred$data$truth, pred$data$response, pred$task.desc$negative, pred$task.desc$positive) - } -) + }) #' @export measureFPR #' @rdname measures #' @format none measureFPR = function(truth, response, negative, positive) { + measureFP(truth, response, positive) / sum(truth == negative) } @@ -1029,14 +1075,15 @@ fnr = makeMeasure(id = "fnr", minimize = TRUE, best = 0, worst = 1, name = "False negative rate", note = "Percentage of misclassified observations in the negative class.", fun = function(task, model, pred, feats, extra.args) { + measureFNR(pred$data$truth, pred$data$response, pred$task.desc$negative, pred$task.desc$positive) - } -) + }) #' @export measureFNR #' @rdname measures #' @format none measureFNR = function(truth, response, negative, positive) { + measureFN(truth, response, negative) / sum(truth == positive) } @@ -1048,24 +1095,26 @@ ppv = makeMeasure(id = "ppv", minimize = FALSE, best = 1, worst = 0, name = "Positive predictive value", note = "Defined as: tp / (tp + fp). Also called precision. If the denominator is 0, PPV is set to be either 1 or 0 depending on whether the highest probability prediction is positive (1) or negative (0).", fun = function(task, model, pred, feats, extra.args) { + if (pred$predict.type == "prob") { prob = getPredictionProbabilities(pred) } else { prob = NULL - } + } measurePPV(pred$data$truth, pred$data$response, pred$task.desc$positive, prob) - } -) + }) #' @export measurePPV #' @rdname measures #' @format none measurePPV = function(truth, response, positive, probabilities = NULL) { + denominator = sum(response == positive) ifelse(denominator == 0, measureEdgeCase(truth, positive, probabilities), measureTP(truth, response, positive) / denominator) } measureEdgeCase = function(truth, positive, prob) { + if (!is.null(prob)) { rs = sort(prob, index.return = TRUE) erst = ifelse(truth[getLast(rs$ix)] == positive, 1, 0) @@ -1084,14 +1133,15 @@ npv = makeMeasure(id = "npv", minimize = FALSE, best = 1, worst = 0, name = "Negative predictive value", note = "Defined as: tn / (tn + fn).", fun = function(task, model, pred, feats, extra.args) { + measureNPV(pred$data$truth, pred$data$response, pred$task.desc$negative) - } -) + }) #' @export measureNPV #' @rdname measures #' @format none measureNPV = function(truth, response, negative) { + measureTN(truth, response, negative) / sum(response == negative) } @@ -1103,14 +1153,15 @@ fdr = makeMeasure(id = "fdr", minimize = TRUE, best = 0, worst = 1, name = "False discovery rate", note = "Defined as: fp / (tp + fp).", fun = function(task, model, pred, feats, extra.args) { + measureFDR(pred$data$truth, pred$data$response, pred$task.desc$positive) - } -) + }) #' @export measureFDR #' @rdname measures #' @format none measureFDR = function(truth, response, positive) { + measureFP(truth, response, positive) / sum(response == positive) } @@ -1122,14 +1173,15 @@ mcc = makeMeasure(id = "mcc", minimize = FALSE, name = "Matthews correlation coefficient", note = "Defined as (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)), denominator set to 1 if 0", fun = function(task, model, pred, feats, extra.args) { + measureMCC(pred$data$truth, pred$data$response, pred$task.desc$negative, pred$task.desc$positive) - } -) + }) #' @export measureMCC #' @rdname measures #' @format none measureMCC = function(truth, response, negative, positive) { + tn = as.numeric(measureTN(truth, response, negative)) tp = as.numeric(measureTP(truth, response, positive)) fn = as.numeric(measureFN(truth, response, negative)) @@ -1150,14 +1202,15 @@ f1 = makeMeasure(id = "f1", minimize = FALSE, best = 1, worst = 0, name = "F1 measure", note = "Defined as: 2 * tp/ (sum(truth == positive) + sum(response == positive))", fun = function(task, model, pred, feats, extra.args) { + measureF1(pred$data$truth, pred$data$response, pred$task.desc$positive) - } -) + }) #' @export measureF1 #' @rdname measures #' @format none measureF1 = function(truth, response, positive) { + 2 * measureTP(truth, response, positive) / (sum(truth == positive) + sum(response == positive)) } @@ -1170,9 +1223,9 @@ gmean = makeMeasure(id = "gmean", minimize = FALSE, best = 1, worst = 0, name = "G-mean", note = "Geometric mean of recall and specificity.", fun = function(task, model, pred, feats, extra.args) { + measureGMEAN(pred$data$truth, pred$data$response, pred$task.desc$negative, pred$task.desc$positive) - } -) + }) #' @export measureGMEAN #' @rdname measures @@ -1182,6 +1235,7 @@ gmean = makeMeasure(id = "gmean", minimize = FALSE, best = 1, worst = 0, #' *Learning from Imbalanced Data.* #' IEEE Transactions on Knowledge and Data Engineering, vol. 21, no. 9. pp. 1263-1284. measureGMEAN = function(truth, response, negative, positive) { + sqrt(measureTPR(truth, response, positive) * measureTNR(truth, response, negative)) } @@ -1193,14 +1247,15 @@ gpr = makeMeasure(id = "gpr", minimize = FALSE, best = 1, worst = 0, name = "Geometric mean of precision and recall.", note = "Defined as: sqrt(ppv * tpr)", fun = function(task, model, pred, feats, extra.args) { + measureGPR(pred$data$truth, pred$data$response, pred$task.desc$positive) - } -) + }) #' @export measureGPR #' @rdname measures #' @format none measureGPR = function(truth, response, positive) { + sqrt(measurePPV(truth, response, positive) * measureTPR(truth, response, positive)) } @@ -1216,14 +1271,16 @@ multilabel.hamloss = makeMeasure(id = "multilabel.hamloss", minimize = TRUE, bes note = "Proportion of labels that are predicted incorrectly, following the definition by Charte and Charte: https://journal.r-project.org/archive/2015-2/charte-charte.pdf.", fun = function(task, model, pred, feats, extra.args) { + measureMultilabelHamloss(getPredictionTruth.PredictionMultilabel(pred), getPredictionResponse.PredictionMultilabel(pred)) -}) + }) #' @export measureMultilabelHamloss #' @rdname measures #' @format none measureMultilabelHamloss = function(truth, response) { + mean(truth != response) } @@ -1236,15 +1293,16 @@ multilabel.subset01 = makeMeasure(id = "multilabel.subset01", minimize = TRUE, b note = "Proportion of observations where the complete multilabel set (all 0-1-labels) is predicted incorrectly, following the definition by Charte and Charte: https://journal.r-project.org/archive/2015-2/charte-charte.pdf.", fun = function(task, model, pred, feats, extra.args) { + measureMultilabelSubset01(getPredictionTruth.PredictionMultilabel(pred), - getPredictionResponse.PredictionMultilabel(pred)) - } -) + getPredictionResponse.PredictionMultilabel(pred)) + }) #' @export measureMultilabelSubset01 #' @rdname measures #' @format none measureMultilabelSubset01 = function(truth, response) { + mean(!apply(truth == response, 1, all)) } @@ -1258,15 +1316,16 @@ multilabel.f1 = makeMeasure(id = "multilabel.f1", minimize = FALSE, best = 1, wo definition by Montanes et al.: http: / /www.sciencedirect.com / science / article / pii / S0031320313004019. Fractions where the denominator becomes 0 are replaced with 1 before computing the average across all instances.", fun = function(task, model, pred, feats, extra.args) { + measureMultilabelF1(getPredictionTruth.PredictionMultilabel(pred), - getPredictionResponse.PredictionMultilabel(pred)) - } -) + getPredictionResponse.PredictionMultilabel(pred)) + }) #' @export measureMultilabelF1 #' @rdname measures #' @format none measureMultilabelF1 = function(truth, response) { + numerator = 2 * rowSums(truth & response) denominator = rowSums(truth + response) mean(ifelse(denominator == 0, 1, numerator / denominator)) @@ -1282,15 +1341,16 @@ multilabel.acc = makeMeasure(id = "multilabel.acc", minimize = FALSE, best = 1, following the definition by Charte and Charte: https: / /journal.r-project.org / archive / 2015 - 2 / charte-charte.pdf. Fractions where the denominator becomes 0 are replaced with 1 before computing the average across all instances.", fun = function(task, model, pred, feats, extra.args) { + measureMultilabelACC(getPredictionTruth.PredictionMultilabel(pred), - getPredictionResponse.PredictionMultilabel(pred)) - } -) + getPredictionResponse.PredictionMultilabel(pred)) + }) #' @export measureMultilabelACC #' @rdname measures #' @format none measureMultilabelACC = function(truth, response) { + numerator = rowSums(truth & response) denominator = rowSums(truth | response) mean(ifelse(denominator == 0, 1, numerator / denominator)) @@ -1306,15 +1366,16 @@ multilabel.ppv = makeMeasure(id = "multilabel.ppv", minimize = FALSE, best = 1, following the definition by Charte and Charte: https: / /journal.r-project.org / archive / 2015 - 2 / charte-charte.pdf. Fractions where the denominator becomes 0 are ignored in the average calculation.", fun = function(task, model, pred, feats, extra.args) { + measureMultilabelPPV(getPredictionTruth.PredictionMultilabel(pred), - getPredictionResponse.PredictionMultilabel(pred)) - } -) + getPredictionResponse.PredictionMultilabel(pred)) + }) #' @export measureMultilabelPPV #' @rdname measures #' @format none measureMultilabelPPV = function(truth, response) { + numerator = rowSums(truth & response) denominator = rowSums(response) mean(numerator / denominator, na.rm = TRUE) @@ -1330,15 +1391,16 @@ multilabel.tpr = makeMeasure(id = "multilabel.tpr", minimize = FALSE, best = 1, following the definition by Charte and Charte: https: / /journal.r-project.org / archive / 2015 - 2 / charte-charte.pdf. Fractions where the denominator becomes 0 are ignored in the average calculation.", fun = function(task, model, pred, feats, extra.args) { + measureMultilabelTPR(getPredictionTruth.PredictionMultilabel(pred), - getPredictionResponse.PredictionMultilabel(pred)) - } -) + getPredictionResponse.PredictionMultilabel(pred)) + }) #' @export measureMultilabelTPR #' @rdname measures #' @format none measureMultilabelTPR = function(truth, response) { + numerator = rowSums(truth & response) denominator = rowSums(truth) mean(numerator / denominator, na.rm = TRUE) @@ -1355,14 +1417,15 @@ cindex = makeMeasure(id = "cindex", minimize = FALSE, best = 1, worst = 0, name = "Harrell's Concordance index", note = "Fraction of all pairs of subjects whose predicted survival times are correctly ordered among all subjects that can actually be ordered. In other words, it is the probability of concordance between the predicted and the observed survival.", fun = function(task, model, pred, feats, extra.args) { + requirePackages("_Hmisc") y = getPredictionResponse(pred) - if (anyMissing(y)) + if (anyMissing(y)) { return(NA_real_) + } s = getPredictionTruth(pred) Hmisc::rcorr.cens(-1 * y, s)[["C Index"]] - } -) + }) #' @export cindex.uno #' @rdname measures @@ -1376,10 +1439,12 @@ cindex.uno = makeMeasure(id = "cindex.uno", minimize = FALSE, best = 1, worst = name = "Uno's Concordance index", note = "Fraction of all pairs of subjects whose predicted survival times are correctly ordered among all subjects that can actually be ordered. In other words, it is the probability of concordance between the predicted and the observed survival. Corrected by weighting with IPCW as suggested by Uno. Implemented in survAUC::UnoC.", fun = function(task, model, pred, feats, extra.args) { + requirePackages("_survAUC") y = getPredictionResponse(pred) - if (anyMissing(y)) + if (anyMissing(y)) { return(NA_real_) + } surv.train = getTaskTargets(task, recode.target = "surv")[model$subset] max.time = assertNumber(extra.args$max.time, null.ok = TRUE) %??% max(getTaskTargets(task)[, 1L]) survAUC::UnoC(Surv.rsp = surv.train, Surv.rsp.new = getPredictionTruth(pred), time = max.time, lpnew = y) @@ -1399,13 +1464,15 @@ iauc.uno = makeMeasure(id = "iauc.uno", minimize = FALSE, best = 1, worst = 0, name = "Uno's estimator of cumulative AUC for right censored time-to-event data", note = "To set an upper time limit, set argument max.time (defaults to max time in complete task). Implemented in survAUC::AUC.uno.", fun = function(task, model, pred, feats, extra.args) { + requirePackages("_survAUC") max.time = assertNumber(extra.args$max.time, null.ok = TRUE) %??% max(getTaskTargets(task)[, 1L]) times = seq(from = 0, to = max.time, length.out = extra.args$resolution) surv.train = getTaskTargets(task, recode.target = "surv")[model$subset] y = getPredictionResponse(pred) - if (anyMissing(y)) + if (anyMissing(y)) { return(NA_real_) + } survAUC::AUC.uno(Surv.rsp = surv.train, Surv.rsp.new = getPredictionTruth(pred), times = times, lpnew = y)$iauc }, extra.args = list(max.time = NULL, resolution = 1000L) @@ -1419,6 +1486,7 @@ ibrier = makeMeasure(id = "ibrier", minimize = TRUE, best = 0, worst = 1, name = "Integrated brier score using Kaplan-Meier estimator for weighting", note = "Only works for methods for which probabilities are provided via pec::predictSurvProb. Currently these are only coxph and randomForestSRC. To set an upper time limit, set argument max.time (defaults to max time in test data). Implemented in pec::pec", fun = function(task, model, pred, feats, extra.args) { + requirePackages(c("survival", "pec")) targets = getTaskTargets(task) tn = getTaskTargetNames(task) @@ -1448,15 +1516,16 @@ meancosts = makeMeasure(id = "meancosts", minimize = TRUE, best = 0, worst = Inf name = "Mean costs of the predicted choices", note = "Defined as: mean(y), where y is the vector of costs for the predicted classes.", fun = function(task, model, pred, feats, extra.args) { + classes = as.character(pred$data$response) ids = pred$data$id costs = getTaskCosts(task) y = mapply(function(id, cl) { + costs[id, cl] }, ids, classes, SIMPLIFY = TRUE, USE.NAMES = FALSE) mean(y) - } -) + }) #' @export mcp #' @rdname measures @@ -1465,11 +1534,11 @@ mcp = makeMeasure(id = "mcp", minimize = TRUE, best = 0, worst = Inf, name = "Misclassification penalty", note = "Average difference between costs of oracle and model prediction.", fun = function(task, model, pred, feats, extra.args) { + mc = meancosts$fun(task, NULL, pred, NULL, extra.args) oc = mean(apply(getTaskCosts(task), 1L, min)) mc - oc - } -) + }) ############################################################################### ### clustering ### @@ -1482,6 +1551,7 @@ db = makeMeasure(id = "db", minimize = TRUE, best = 0, worst = Inf, name = "Davies-Bouldin cluster separation measure", note = "Ratio of the within cluster scatter, to the between cluster separation, averaged over the clusters. See `?clusterSim::index.DB`.", fun = function(task, model, pred, feats, extra.args) { + if (length(unique(pred$data$response)) > 1L) { requirePackages("clusterSim", default.method = "load") r = as.integer(as.factor(pred$data$response)) @@ -1489,8 +1559,7 @@ db = makeMeasure(id = "db", minimize = TRUE, best = 0, worst = Inf, } else { NA } - } -) + }) #' @export dunn #' @rdname measures @@ -1500,40 +1569,40 @@ dunn = makeMeasure(id = "dunn", minimize = FALSE, best = Inf, worst = 0, name = "Dunn index", note = "Defined as the ratio of the smallest distance between observations not in the same cluster to the largest intra-cluster distance. See `?clValid::dunn`.", fun = function(task, model, pred, feats, extra.args) { + # produced a confusing note in some cases, see issue #232 suppressMessages(requirePackages("clValid", default.method = "load")) r = as.integer(as.factor(pred$data$response)) clValid::dunn(Data = feats, clusters = r) - } -) + }) #' @export G1 #' @rdname measures #' @format none -G1 = makeMeasure(id = "G1", minimize = FALSE, best = Inf, worst = 0, # nolint +G1 = makeMeasure(id = "G1", minimize = FALSE, best = Inf, worst = 0, # nolint properties = c("cluster", "req.pred", "req.feats"), name = "Calinski-Harabasz pseudo F statistic", note = "Defined as ratio of between-cluster variance to within cluster variance. See `?clusterSim::index.G1`.", fun = function(task, model, pred, feats, extra.args) { + requirePackages("clusterSim", default.method = "load") r = as.integer(as.factor(pred$data$response)) clusterSim::index.G1(feats, r) - } -) + }) #' @export G2 #' @rdname measures #' @format none -G2 = makeMeasure(id = "G2", minimize = FALSE, best = 1, worst = 0, # nolint +G2 = makeMeasure(id = "G2", minimize = FALSE, best = 1, worst = 0, # nolint properties = c("cluster", "req.pred", "req.feats"), name = "Baker and Hubert adaptation of Goodman-Kruskal's gamma statistic", note = "Defined as: (number of concordant comparisons - number of discordant comparisons) / (number of concordant comparisons + number of discordant comparisons). See `?clusterSim::index.G2`.", fun = function(task, model, pred, feats, extra.args) { + requirePackages("clusterSim", default.method = "load") r = as.integer(as.factor(pred$data$response)) clusterSim::index.G2(clusterSim::dist.GDM(feats), r) - } -) + }) #' @export silhouette #' @rdname measures @@ -1543,8 +1612,8 @@ silhouette = makeMeasure(id = "silhouette", minimize = FALSE, best = Inf, worst name = "Rousseeuw's silhouette internal cluster quality index", note = "Silhouette value of an observation is a measure of how similar an object is to its own cluster compared to other clusters. The measure is calculated as the average of all silhouette values. See `?clusterSim::index.S`.", fun = function(task, model, pred, feats, extra.args) { + requirePackages("clusterSim", default.method = "load") r = as.integer(as.factor(pred$data$response)) clusterSim::index.S(clusterSim::dist.GDM(feats), r) - } -) + }) diff --git a/R/mergeBenchmarkResults.R b/R/mergeBenchmarkResults.R index 3367a045ce..6708845277 100644 --- a/R/mergeBenchmarkResults.R +++ b/R/mergeBenchmarkResults.R @@ -14,20 +14,23 @@ #' @noMd #' @export mergeBenchmarkResults = function(bmrs) { + # check all objects have the class BenchmarkResult assertList(bmrs, types = "BenchmarkResult") # check if all task types are equal unique.tt = unique(unlist(lapply(bmrs, function(x) getBMRObjects(x, fun = getTaskType)))) - if (length(unique.tt) != 1) + if (length(unique.tt) != 1) { stopf("Different task types found: %s", collapse(unique.tt)) + } # check if resample descriptions are equal for each task task.rin = peelList(lapply(bmrs, function(bmr) getBMRObjects(bmr, fun = function(x) getRRPredictions(x)$instance$desc))) task.rin = groupNamedListByNames(task.rin) unique.rin = vlapply(task.rin, function(x) length(unique(x)) == 1) - if (any(!unique.rin)) + if (any(!unique.rin)) { stopf("Different resample description found for tasks: %s", collapse(names(unique.rin)[!unique.rin])) + } # get unique learner ids and task ids learner.ids = unique(unlist(lapply(bmrs, getBMRLearnerIds))) @@ -37,6 +40,7 @@ mergeBenchmarkResults = function(bmrs) { all.combos = expand.grid(task.id = task.ids, learner.id = learner.ids) all.combos = stri_paste(all.combos$task.id, all.combos$learner.id, sep = " - ") existing.combos = rbindlist(lapply(bmrs, function(bmr) { + getBMRAggrPerformances(bmr, as.df = TRUE)[, c("task.id", "learner.id")] })) existing.combos = stri_paste(existing.combos$task.id, existing.combos$learner.id, sep = " - ") @@ -50,7 +54,7 @@ mergeBenchmarkResults = function(bmrs) { # get all learners from bmrs and merge lrns.merged = peelList(lapply(bmrs, getBMRLearners)) - lrns.merged = unique(lrns.merged) #lrns.merged[!duplicated(lrns.merged)] + lrns.merged = unique(lrns.merged) # lrns.merged[!duplicated(lrns.merged)] # get ResampleResults from bmrs and merge them by setting the correct structure res.merged = peelList(extractSubList(bmrs, "results", simplify = FALSE)) @@ -73,14 +77,17 @@ mergeBenchmarkResults = function(bmrs) { # simple wrapper for unlist() with recursive set to FALSE peelList = function(x) { + unlist(x, recursive = FALSE) } groupNamedListByNames = function(xs, name = sort(unique(names(xs)))) { + assertList(xs, names = "named") assertCharacter(name) res = lapply(name, function(x) { + ret = xs[names(xs) == x] names(ret) = NULL peelList(ret) diff --git a/R/mergeSmallFactorLevels.R b/R/mergeSmallFactorLevels.R index 3aea34e504..489220db6d 100644 --- a/R/mergeSmallFactorLevels.R +++ b/R/mergeSmallFactorLevels.R @@ -19,6 +19,7 @@ #' @family eda_and_preprocess #' @export mergeSmallFactorLevels = function(task, cols = NULL, min.perc = 0.01, new.level = ".merged") { + assertClass(task, "Task") assertNumber(min.perc, lower = 0, upper = 1) assertString(new.level) @@ -34,8 +35,9 @@ mergeSmallFactorLevels = function(task, cols = NULL, min.perc = 0.01, new.level for (cn in cns) { x = as.factor(data[[cn]]) - if (new.level %in% levels(x)) + if (new.level %in% levels(x)) { stopf("Value of new.level = '%s' is already a level of column '%s'!", new.level, cn) + } j = which(prop.table(table(x)) < min.perc) if (length(j) > 0L) { levels(x)[levels(x) %in% names(j)] = new.level diff --git a/R/mutateBits.R b/R/mutateBits.R index 2638b2405f..f376d58c40 100644 --- a/R/mutateBits.R +++ b/R/mutateBits.R @@ -2,6 +2,7 @@ # mutateBits = function(x, rate = 1 / length(x)) { + n = length(x) flip = rbinom(n, 1, rate) (x + flip) %% 2 diff --git a/R/options.R b/R/options.R index ebba57eeff..22d1a3da53 100644 --- a/R/options.R +++ b/R/options.R @@ -7,28 +7,33 @@ #' @export #' @family configure getMlrOptions = function() { + mlr.options = .Options[stri_startswith_fixed(names(.Options), "mlr.")] names(mlr.options) = stri_sub(names(mlr.options), from = 5L) mlr.options[!stri_startswith_fixed(names(mlr.options), "debug.")] } setMlrOption = function(name, val) { + name = sprintf("mlr.%s", name) do.call(options, setNames(list(val), name)) } getMlrOption = function(name, default = NULL) { + getOption(stri_paste("mlr.", name), default) } # FIXME: the mechanism here is not perfect. # we export the options to the slaves, then read and set them exportMlrOptions = function(level) { + .mlr.slave.options = getMlrOptions() parallelExport(".mlr.slave.options", level = level, master = FALSE, show.info = FALSE) } setSlaveOptions = function() { + if (getOption("parallelMap.on.slave", FALSE)) { # for multicocre the options are not exported, we also dont need them due to forking.... if (exists(".mlr.slave.options", envir = .GlobalEnv)) { diff --git a/R/performance.R b/R/performance.R index a00b0e59cc..4f8403684f 100644 --- a/R/performance.R +++ b/R/performance.R @@ -20,82 +20,95 @@ #' @examples #' training.set = seq(1, nrow(iris), by = 2) #' test.set = seq(2, nrow(iris), by = 2) -#' +#' #' task = makeClassifTask(data = iris, target = "Species") #' lrn = makeLearner("classif.lda") #' mod = train(lrn, task, subset = training.set) #' pred = predict(mod, newdata = iris[test.set, ]) #' performance(pred, measures = mmce) -#' +#' #' # Compute multiple performance measures at once #' ms = list("mmce" = mmce, "acc" = acc, "timetrain" = timetrain) #' performance(pred, measures = ms, task, mod) performance = function(pred, measures, task = NULL, model = NULL, feats = NULL, simpleaggr = FALSE) { - if (!is.null(pred)) + + if (!is.null(pred)) { assertClass(pred, classes = "Prediction") + } measures = checkMeasures(measures, pred$task.desc) res = vnapply(measures, doPerformanceIteration, pred = pred, task = task, model = model, td = NULL, feats = feats, simpleaggr = simpleaggr) # FIXME: This is really what the names should be, but it breaks all kinds of other stuff - #if (inherits(pred, "ResamplePrediction")) { + # if (inherits(pred, "ResamplePrediction")) { # setNames(res, vcapply(measures, measureAggrName)) - #} else { + # } else { # setNames(res, extractSubList(measures, "id")) - #} + # } setNames(res, extractSubList(measures, "id")) } doPerformanceIteration = function(measure, pred = NULL, task = NULL, model = NULL, td = NULL, feats = NULL, simpleaggr = simpleaggr) { + m = measure props = getMeasureProperties(m) if ("req.pred" %in% props) { - if (is.null(pred)) + if (is.null(pred)) { stopf("You need to pass pred for measure %s!", m$id) + } } if ("req.truth" %in% props) { type = getTaskDesc(pred)$type if (type == "surv") { - if (is.null(pred$data$truth.time) || is.null(pred$data$truth.event)) + if (is.null(pred$data$truth.time) || is.null(pred$data$truth.event)) { stopf("You need to have 'truth.time' and 'truth.event' columns in your pred object for measure %s!", m$id) + } } else if (type == "multilabel") { - if (!(any(stri_detect_regex(colnames(pred$data), "^truth\\.")))) + if (!(any(stri_detect_regex(colnames(pred$data), "^truth\\.")))) { stopf("You need to have 'truth.*' columns in your pred object for measure %s!", m$id) + } } else { - if (is.null(pred$data$truth)) + if (is.null(pred$data$truth)) { stopf("You need to have a 'truth' column in your pred object for measure %s!", m$id) + } } } if ("req.model" %in% props) { - if (is.null(model)) + if (is.null(model)) { stopf("You need to pass model for measure %s!", m$id) + } assertClass(model, classes = "WrappedModel") } if ("req.task" %in% props) { - if (is.null(task)) + if (is.null(task)) { stopf("You need to pass task for measure %s!", m$id) + } assertClass(task, classes = "Task") } if ("req.feats" %in% props) { - if (is.null(task) && is.null(feats)) + if (is.null(task) && is.null(feats)) { stopf("You need to pass either task or features for measure %s!", m$id) - else if (is.null(feats)) + } else if (is.null(feats)) { feats = task$env$data[pred$data$id, , drop = FALSE] - else + } else { assertClass(feats, "data.frame") + } } # we need to find desc somewhere - td = if (!is.null(pred)) + td = if (!is.null(pred)) { pred$task.desc - else if (!is.null(model)) + } else if (!is.null(model)) { model$task.desc - else if (!is.null(task)) + } else if (!is.null(task)) { getTaskDesc(task) + } # null only happens in custom resampled measure when we do no individual measurements if (!is.null(td)) { - if (td$type %nin% props) + if (td$type %nin% props) { stopf("Measure %s does not support task type %s!", m$id, td$type) - if (td$type == "classif" && length(td$class.levels) > 2L && "classif.multi" %nin% props) + } + if (td$type == "classif" && length(td$class.levels) > 2L && "classif.multi" %nin% props) { stopf("Multiclass problems cannot be used for measure %s!", m$id) + } # if we have multiple req.pred.types, check if we have one of them (currently we only need prob) req.pred.types = if ("req.prob" %in% props) "prob" else character(0L) @@ -119,6 +132,7 @@ doPerformanceIteration = function(measure, pred = NULL, task = NULL, model = NUL if (is.null(pred$data$iter)) pred$data$iter = 1L if (is.null(pred$data$set)) pred$data$set = "test" fun = function(ss) { + is.train = ss$set == "train" if (any(is.train)) { pred$data = as.data.frame(ss[is.train, ]) diff --git a/R/plotBMRBoxplots.R b/R/plotBMRBoxplots.R index ed7e7f1cf2..14e3026eaf 100644 --- a/R/plotBMRBoxplots.R +++ b/R/plotBMRBoxplots.R @@ -47,14 +47,16 @@ plotBMRBoxplots = function(bmr, measure = NULL, style = "box", order.lrns = NULL p = ggplot(df, aes_string("learner.id", measure$id)) p = p + theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = -45, hjust = 0)) - p = p + facet_wrap(~ task.id, nrow = facet.wrap.nrow, ncol = facet.wrap.ncol) + p = p + facet_wrap(~task.id, nrow = facet.wrap.nrow, ncol = facet.wrap.ncol) - if (pretty.names) + if (pretty.names) { p = p + ylab(measure$name) + } - if (style == "box") + if (style == "box") { p = p + geom_boxplot() - else + } else { p = p + geom_violin() + stat_summary(fun.ymin = median, fun.ymax = median, fun.y = median, geom = "crossbar") + } return(p) } diff --git a/R/plotBMRRanksAsBarChart.R b/R/plotBMRRanksAsBarChart.R index ed6453f43e..54c29512d6 100644 --- a/R/plotBMRRanksAsBarChart.R +++ b/R/plotBMRRanksAsBarChart.R @@ -31,6 +31,7 @@ #' # see benchmark plotBMRRanksAsBarChart = function(bmr, measure = NULL, ties.method = "average", aggregation = "default", pos = "stack", order.lrns = NULL, order.tsks = NULL, pretty.names = TRUE) { + assertClass(bmr, "BenchmarkResult") measure = checkBMRMeasure(measure, bmr) assertChoice(pos, c("tile", "stack", "dodge")) diff --git a/R/plotBMRSummary.R b/R/plotBMRSummary.R index 76082813d9..c6d4fd13e4 100644 --- a/R/plotBMRSummary.R +++ b/R/plotBMRSummary.R @@ -29,6 +29,7 @@ #' # see benchmark plotBMRSummary = function(bmr, measure = NULL, trafo = "none", order.tsks = NULL, pointsize = 4L, jitter = 0.05, pretty.names = TRUE) { + assertClass(bmr, "BenchmarkResult") measure = checkBMRMeasure(measure, bmr) assertChoice(trafo, c("none", "rank")) @@ -42,7 +43,7 @@ plotBMRSummary = function(bmr, measure = NULL, trafo = "none", order.tsks = NULL # trafo to ranks manually here if (trafo == "rank") { setDT(df) - df[, get("meas.name") := rank(.SD[[meas.name]], ties.method = "average"), by = "task.id"] # nolint FIXME: find out what `:=` looks like in the AST and adjust the linter + df[, get("meas.name") := rank(.SD[[meas.name]], ties.method = "average"), by = "task.id"] # nolint FIXME: find out what `:=` looks like in the AST and adjust the linter setDF(df) xlab.string = stri_paste("rank of", xlab.string, sep = " ") } @@ -66,4 +67,3 @@ plotBMRSummary = function(bmr, measure = NULL, trafo = "none", order.tsks = NULL return(p) } - diff --git a/R/plotCritDifferences.R b/R/plotCritDifferences.R index eb256905e8..ee2bd208e7 100644 --- a/R/plotCritDifferences.R +++ b/R/plotCritDifferences.R @@ -57,7 +57,8 @@ #' @md #' @export generateCritDifferencesData = function(bmr, measure = NULL, p.value = 0.05, - baseline = NULL, test = "bd") { + baseline = NULL, test = "bd") { + assertClass(bmr, "BenchmarkResult") assertChoice(test, c("nemenyi", "bd")) assertNumeric(p.value, lower = 0, upper = 1, len = 1) @@ -105,7 +106,7 @@ generateCritDifferencesData = function(bmr, measure = NULL, p.value = 0.05, FUN = function(x) ifelse(x > 0 && x < cd.info$cd, x, 0)) # Get start and end point of all possible bars xstart = round(apply(mat + sub, 1, min), 3) - xend = round(apply(mat + sub, 1, max), 3) + xend = round(apply(mat + sub, 1, max), 3) nem.df = data.table(xstart, xend, "diff" = xend - xstart) # For each unique endpoint of a bar keep only the longest bar nem.df = nem.df[, .SD[which.max(.SD$diff)], by = "xend"] @@ -152,6 +153,7 @@ generateCritDifferencesData = function(bmr, measure = NULL, p.value = 0.05, #' @examples #' # see benchmark plotCritDifferences = function(obj, baseline = NULL, pretty.names = TRUE) { + assertClass(obj, "CritDifferencesData") # Plot descritptive lines and learner names @@ -160,31 +162,31 @@ plotCritDifferences = function(obj, baseline = NULL, pretty.names = TRUE) { p = p + geom_point(aes_string("mean.rank", 0, colour = "learner.id"), size = 3) # Horizontal descriptive bar p = p + geom_segment(aes_string("mean.rank", 0, xend = "mean.rank", yend = "yend", - color = "learner.id"), size = 1) + color = "learner.id"), size = 1) # Vertical descriptive bar p = p + geom_segment(aes_string("mean.rank", "yend", xend = "xend", - yend = "yend", color = "learner.id"), size = 1) + yend = "yend", color = "learner.id"), size = 1) # Plot Learner name if (pretty.names) { p = p + geom_text(aes_string("xend", "yend", label = "short.name", color = "learner.id", - hjust = "right"), vjust = -1) + hjust = "right"), vjust = -1) } else { p = p + geom_text(aes_string("xend", "yend", label = "learner.id", color = "learner.id", - hjust = "right"), vjust = -1) + hjust = "right"), vjust = -1) } p = p + xlab("Average Rank") # Change appearance p = p + scale_x_continuous(breaks = c(0:max(obj$data$xend))) p = p + theme(axis.text.y = element_blank(), - axis.ticks.y = element_blank(), - axis.title.y = element_blank(), - legend.position = "none", - panel.background = element_blank(), - panel.border = element_blank(), - axis.line = element_line(size = 1), - axis.line.y = element_blank(), - panel.grid.major = element_blank(), - plot.background = element_blank()) + axis.ticks.y = element_blank(), + axis.title.y = element_blank(), + legend.position = "none", + panel.background = element_blank(), + panel.border = element_blank(), + axis.line = element_line(size = 1), + axis.line.y = element_blank(), + panel.grid.major = element_blank(), + plot.background = element_blank()) # Write some values into shorter names as they are used numerous times. cd.x = obj$cd.info$x @@ -199,34 +201,34 @@ plotCritDifferences = function(obj, baseline = NULL, pretty.names = TRUE) { } # Add horizontal bar arround baseline p = p + annotate("segment", x = cd.x + cd, xend = cd.x - cd, y = cd.y, yend = cd.y, - alpha = 0.5, color = "darkgrey", size = 2) + alpha = 0.5, color = "darkgrey", size = 2) # Add intervall limiting bar's p = p + annotate("segment", x = cd.x + cd, xend = cd.x + cd, y = cd.y - 0.05, - yend = cd.y + 0.05, color = "darkgrey", size = 1) + yend = cd.y + 0.05, color = "darkgrey", size = 1) p = p + annotate("segment", x = cd.x - cd, xend = cd.x - cd, y = cd.y - 0.05, - yend = cd.y + 0.05, color = "darkgrey", size = 1) + yend = cd.y + 0.05, color = "darkgrey", size = 1) # Add point at learner p = p + annotate("point", x = cd.x, y = cd.y, alpha = 0.5) # Add critical difference text p = p + annotate("text", label = stri_paste("Critical Difference =", round(cd, 2), sep = " "), - x = cd.x, y = cd.y + 0.05) + x = cd.x, y = cd.y + 0.05) } else { nemenyi.data = obj$cd.info$nemenyi.data if (!(nrow(nemenyi.data) == 0L)) { # Add connecting bars p = p + geom_segment(aes_string("xstart", "y", xend = "xend", yend = "y"), - data = nemenyi.data, size = 2, color = "dimgrey", alpha = 0.9) + data = nemenyi.data, size = 2, color = "dimgrey", alpha = 0.9) # Add text (descriptive) p = p + annotate("text", - label = stri_paste("Critical Difference =", round(cd, 2), sep = " "), - y = max(obj$data$yend) + .1, x = mean(obj$data$mean.rank)) + label = stri_paste("Critical Difference =", round(cd, 2), sep = " "), + y = max(obj$data$yend) + .1, x = mean(obj$data$mean.rank)) # Add bar (descriptive) p = p + annotate("segment", - x = mean(obj$data$mean.rank) - 0.5 * cd, - xend = mean(obj$data$mean.rank) + 0.5 * cd, - y = max(obj$data$yend) + .2, - yend = max(obj$data$yend) + .2, - size = 2L) + x = mean(obj$data$mean.rank) - 0.5 * cd, + xend = mean(obj$data$mean.rank) + 0.5 * cd, + y = max(obj$data$yend) + .2, + yend = max(obj$data$yend) + .2, + size = 2L) } else { message("No connecting bars to plot!") } diff --git a/R/plotLearnerPrediction.R b/R/plotLearnerPrediction.R index 11d119a095..334b2bfe99 100644 --- a/R/plotLearnerPrediction.R +++ b/R/plotLearnerPrediction.R @@ -60,7 +60,7 @@ #' @template arg_prettynames #' @return The ggplot2 object. #' @export -plotLearnerPrediction = function(learner, task, features = NULL, measures, cv = 10L, ..., +plotLearnerPrediction = function(learner, task, features = NULL, measures, cv = 10L, ..., gridsize, pointsize = 2, prob.alpha = TRUE, se.band = TRUE, err.mark = "train", @@ -86,10 +86,12 @@ plotLearnerPrediction = function(learner, task, features = NULL, measures, cv = assertSubset(features, choices = fns) } taskdim = length(features) - if (td$type %in% c("classif", "cluster") && taskdim != 2L) + if (td$type %in% c("classif", "cluster") && taskdim != 2L) { stopf("Classification and clustering: currently only 2D plots supported, not: %i", taskdim) - if (td$type == "regr" && taskdim %nin% 1:2) + } + if (td$type == "regr" && taskdim %nin% 1:2) { stopf("Regression: currently only 1D and 2D plots supported, not: %i", taskdim) + } measures = checkMeasures(measures, task) cv = asCount(cv) @@ -107,8 +109,9 @@ plotLearnerPrediction = function(learner, task, features = NULL, measures, cv = assertNumber(err.size, lower = 0) assertLogical(greyscale) - if (td$type == "classif" && err.mark == "cv" && cv == 0L) + if (td$type == "classif" && err.mark == "cv" && cv == 0L) { stopf("Classification: CV must be switched on, with 'cv' > 0, for err.type = 'cv'!") + } # subset to features, set hyperpars task = subsetTask(task, features = features) @@ -117,17 +120,20 @@ plotLearnerPrediction = function(learner, task, features = NULL, measures, cv = # some shortcut names target = td$target data = getTaskData(task) - if (td$type != "cluster") + if (td$type != "cluster") { y = getTaskTargets(task) + } x1n = features[1L] x1 = data[, x1n] # predictions # if learner supports prob or se, enable it - if (td$type == "regr" && taskdim == 1L && hasLearnerProperties(learner, "se")) + if (td$type == "regr" && taskdim == 1L && hasLearnerProperties(learner, "se")) { learner = setPredictType(learner, "se") - if (td$type == "classif" && hasLearnerProperties(learner, "prob")) + } + if (td$type == "classif" && hasLearnerProperties(learner, "prob")) { learner = setPredictType(learner, "prob") + } mod = train(learner, task) pred.train = predict(mod, task) yhat = pred.train$data$response @@ -161,12 +167,13 @@ plotLearnerPrediction = function(learner, task, features = NULL, measures, cv = grid[, target] = pred.grid$data$response if (td$type == "classif") { - data$.err = if (err.mark == "train") + data$.err = if (err.mark == "train") { y != yhat - else if (err.mark == "cv") + } else if (err.mark == "cv") { y != pred.cv$data[order(pred.cv$data$id), "response"] - else + } else { TRUE + } if (taskdim == 2L) { p = ggplot(grid, aes_string(x = x1n, y = x2n)) if (hasLearnerProperties(learner, "prob") && prob.alpha) { @@ -194,7 +201,7 @@ plotLearnerPrediction = function(learner, task, features = NULL, measures, cv = # print error points p = p + geom_point(data = subset(data, data$.err), mapping = aes_string(x = x1n, y = x2n, shape = target), size = err.size, show.legend = FALSE) - p = p + guides(alpha = FALSE) + p = p + guides(alpha = FALSE) } } else if (td$type == "cluster") { if (taskdim == 2L) { @@ -216,7 +223,7 @@ plotLearnerPrediction = function(learner, task, features = NULL, measures, cv = p = p + geom_ribbon(data = grid, mapping = aes_string(ymin = ".ymin", ymax = ".ymax"), alpha = 0.2) } } else if (taskdim == 2L) { - #FIXME: color are not scaled correctly? can be improved? + # FIXME: color are not scaled correctly? can be improved? # plot background from model / grid p = ggplot(mapping = aes_string(x = x1n, y = x2n)) p = p + geom_raster(data = grid, mapping = aes_string(fill = target)) @@ -228,7 +235,7 @@ plotLearnerPrediction = function(learner, task, features = NULL, measures, cv = size = pointsize, colour = "black", shape = 1) # plot point, with circle and interior color for y p = p + scale_colour_gradient2(low = bg.cols[1L], mid = bg.cols[2L], high = bg.cols[3L], space = "Lab") - p = p + guides(colour = FALSE) + p = p + guides(colour = FALSE) } } diff --git a/R/plotResiduals.R b/R/plotResiduals.R index 55b931105b..c12ac3d162 100644 --- a/R/plotResiduals.R +++ b/R/plotResiduals.R @@ -38,8 +38,9 @@ plotResiduals.Prediction = function(obj, type = "scatterplot", loess.smooth = TR rug = TRUE, pretty.names = TRUE) { task.type = obj$task.desc$type - if (task.type %nin% c("regr", "classif")) + if (task.type %nin% c("regr", "classif")) { stopf("Task type must be 'regr' or 'classif'. But has type '%s'.", task.type) + } df = as.data.frame(obj) @@ -52,13 +53,15 @@ plotResiduals.Prediction = function(obj, type = "scatterplot", loess.smooth = TR plotResiduals.BenchmarkResult = function(obj, type = "scatterplot", loess.smooth = TRUE, rug = TRUE, pretty.names = TRUE) { - task.type = getBMRObjects(obj, as.df = TRUE, fun = function(X){ + task.type = getBMRObjects(obj, as.df = TRUE, fun = function(X) { + getRRTaskDesc(X)$type }) task.type = unique(task.type$p) - if (task.type %nin% c("regr", "classif")) + if (task.type %nin% c("regr", "classif")) { stopf("Task type must be 'regr' or 'classif'. But has type '%s'.", task.type) + } df = getBMRPredictions(obj, as.df = TRUE) @@ -85,10 +88,12 @@ makeResidualPlot = function(df, type = "scatterplot", loess.smooth = TRUE, } else { p = p + geom_point() - if (loess.smooth) + if (loess.smooth) { p = p + geom_smooth(se = FALSE) - if (rug) + } + if (rug) { p = p + geom_rug(color = "red") + } } p = p + ggtitle("True value vs. fitted value") } else { diff --git a/R/plotTuneMultiCritResult.R b/R/plotTuneMultiCritResult.R index e7cd2355ff..a9328c89fa 100644 --- a/R/plotTuneMultiCritResult.R +++ b/R/plotTuneMultiCritResult.R @@ -26,14 +26,17 @@ #' @examples #' # see tuneParamsMultiCrit plotTuneMultiCritResult = function(res, path = TRUE, col = NULL, shape = NULL, pointsize = 2, pretty.names = TRUE) { + assertClass(res, "TuneMultiCritResult") assertFlag(path) op1 = res$opt.path op2 = as.data.frame(op1) - if (!is.null(col)) + if (!is.null(col)) { assertChoice(col, choices = colnames(op2)) - if (!is.null(shape)) + } + if (!is.null(shape)) { assertChoice(shape, colnames(op2)) + } names.y = colnames(res$y)[1:2] @@ -49,8 +52,9 @@ plotTuneMultiCritResult = function(res, path = TRUE, col = NULL, shape = NULL, p p = ggplot(data, mapping = map) p = p + geom_point(size = pointsize) - if (path) + if (path) { p = p + geom_point(data = front, size = pointsize * 1.5) + } if (pretty.names) { names.y = sapply(res$measures, function(x) x$id) p = p + labs(x = names.y[1L], y = names.y[2L]) diff --git a/R/predict.R b/R/predict.R index 3e2a25ebc3..b634d2cfe6 100644 --- a/R/predict.R +++ b/R/predict.R @@ -31,7 +31,7 @@ #' p = predict(model, newdata = iris, subset = test.set) #' print(p) #' predict(model, task = iris.task, subset = test.set) -#' +#' #' # predict now probabiliies instead of class labels #' lrn = makeLearner("classif.lda", predict.type = "prob") #' model = train(lrn, iris.task, subset = train.set) @@ -39,8 +39,10 @@ #' print(p) #' getPredictionProbabilities(p) predict.WrappedModel = function(object, task, newdata, subset = NULL, ...) { - if (!xor(missing(task), missing(newdata))) + + if (!xor(missing(task), missing(newdata))) { stop("Pass either a task object or a newdata data.frame to predict, but not both!") + } assertClass(object, classes = "WrappedModel") model = object learner = model$learner @@ -53,7 +55,7 @@ predict.WrappedModel = function(object, task, newdata, subset = NULL, ...) { } else { assertDataFrame(newdata, min.rows = 1L) if (class(newdata)[1] != "data.frame") { - warningf("Provided data for prediction is not a pure data.frame but from class %s, hence it will be converted.", class(newdata)[1]) + warningf("Provided data for prediction is not a pure data.frame but from class %s, hence it will be converted.", class(newdata)[1]) newdata = as.data.frame(newdata) } size = nrow(newdata) @@ -78,11 +80,13 @@ predict.WrappedModel = function(object, task, newdata, subset = NULL, ...) { # get truth and drop target col, if target in newdata if (!all(is.na(t.col))) { - if (length(t.col) > 1L && anyMissing(t.col)) + if (length(t.col) > 1L && anyMissing(t.col)) { stop("Some but not all target columns found in data") + } truth = newdata[, t.col, drop = TRUE] - if (is.list(truth)) + if (is.list(truth)) { truth = data.frame(truth) + } newdata = newdata[, -t.col, drop = FALSE] } else { truth = NULL @@ -97,7 +101,7 @@ predict.WrappedModel = function(object, task, newdata, subset = NULL, ...) { time.predict = NA_real_ dump = getFailureModelDump(model) } else { - #FIXME: this copies newdata + # FIXME: this copies newdata pars = list( .learner = learner, .model = model, @@ -105,26 +109,35 @@ predict.WrappedModel = function(object, task, newdata, subset = NULL, ...) { ) pars = c(pars, getHyperPars(learner, c("predict", "both"))) debug.seed = getMlrOption("debug.seed", NULL) - if (!is.null(debug.seed)) + if (!is.null(debug.seed)) { set.seed(debug.seed) + } opts = getLearnerOptions(learner, c("show.learner.output", "on.learner.error", "on.learner.warning", "on.error.dump")) fun1 = if (opts$show.learner.output) identity else capture.output fun2 = if (opts$on.learner.error == "stop") identity else function(x) try(x, silent = TRUE) - fun3 = if (opts$on.learner.error == "stop" || !opts$on.error.dump) identity else function(x) { + fun3 = if (opts$on.learner.error == "stop" || !opts$on.error.dump) { + identity + } else { + function(x) { + withCallingHandlers(x, error = function(c) utils::dump.frames()) } + } if (opts$on.learner.warning == "quiet") { old.warn.opt = getOption("warn") on.exit(options(warn = old.warn.opt)) options(warn = -1L) } - time.predict = measureTime(fun1({p = fun2(fun3(do.call(predictLearner2, pars)))})) + time.predict = measureTime(fun1({ + p = fun2(fun3(do.call(predictLearner2, pars))) + })) # was there an error during prediction? if (is.error(p)) { - if (opts$on.learner.error == "warn") + if (opts$on.learner.error == "warn") { warningf("Could not predict with learner %s: %s", learner$id, as.character(p)) + } error = as.character(p) p = predictFailureModel(model, newdata) time.predict = NA_real_ @@ -133,10 +146,11 @@ predict.WrappedModel = function(object, task, newdata, subset = NULL, ...) { } } } - if (missing(task)) + if (missing(task)) { ids = NULL - else + } else { ids = subset + } makePrediction(task.desc = td, row.names = rownames(newdata), id = ids, truth = truth, predict.type = learner$predict.type, predict.threshold = learner$predict.threshold, y = p, time = time.predict, error = error, dump = dump) } diff --git a/R/predictLearner.R b/R/predictLearner.R index 424a5c7a92..35d5063073 100644 --- a/R/predictLearner.R +++ b/R/predictLearner.R @@ -37,6 +37,7 @@ #' } #' @export predictLearner = function(.learner, .model, .newdata, ...) { + lmod = getLearnerModel(.model) if (inherits(lmod, "NoFeaturesModel")) { predictNofeatures(.model, .newdata) @@ -47,6 +48,7 @@ predictLearner = function(.learner, .model, .newdata, ...) { } predictLearner2 = function(.learner, .model, .newdata, ...) { + # if we have that option enabled, set factor levels to complete levels from task if (.learner$fix.factors.prediction) { fls = .model$factor.levels @@ -54,9 +56,10 @@ predictLearner2 = function(.learner, .model, .newdata, ...) { # only take objects in .newdata ns = intersect(colnames(.newdata), ns) fls = fls[ns] - if (length(ns) > 0L) + if (length(ns) > 0L) { .newdata[ns] = mapply(factor, x = .newdata[ns], - levels = fls, SIMPLIFY = FALSE) + levels = fls, SIMPLIFY = FALSE) + } } p = predictLearner(.learner, .model, .newdata, ...) p = checkPredictLearnerOutput(.learner, .model, p) @@ -81,58 +84,73 @@ predictLearner2 = function(.learner, .model, .newdata, ...) { #' @keywords internal #' @export checkPredictLearnerOutput = function(learner, model, p) { + cl = class(p)[1L] if (learner$type == "classif") { levs = model$task.desc$class.levels if (learner$predict.type == "response") { # the levels of the predicted classes might not be complete.... # be sure to add the levels at the end, otherwise data gets changed!!! - if (!is.factor(p)) + if (!is.factor(p)) { stopf("predictLearner for %s has returned a class %s instead of a factor!", learner$id, cl) + } levs2 = levels(p) - if (length(levs2) != length(levs) || any(levs != levs2)) + if (length(levs2) != length(levs) || any(levs != levs2)) { p = factor(p, levels = levs) + } } else if (learner$predict.type == "prob") { - if (!is.matrix(p)) + if (!is.matrix(p)) { stopf("predictLearner for %s has returned a class %s instead of a matrix!", learner$id, cl) + } cns = colnames(p) - if (is.null(cns) || length(cns) == 0L) + if (is.null(cns) || length(cns) == 0L) { stopf("predictLearner for %s has returned not the class levels as column names, but no column names at all!", learner$id) - if (!setequal(cns, levs)) + } + if (!setequal(cns, levs)) { stopf("predictLearner for %s has returned not the class levels as column names: %s", learner$id, collapse(colnames(p))) + } } } else if (learner$type == "regr") { if (learner$predict.type == "response") { - if (cl != "numeric") + if (cl != "numeric") { stopf("predictLearner for %s has returned a class %s instead of a numeric!", learner$id, cl) - } else if (learner$predict.type == "se") { - if (!is.matrix(p)) + } + } else if (learner$predict.type == "se") { + if (!is.matrix(p)) { stopf("predictLearner for %s has returned a class %s instead of a matrix!", learner$id, cl) - if (ncol(p) != 2L) + } + if (ncol(p) != 2L) { stopf("predictLearner for %s has not returned a numeric matrix with 2 columns!", learner$id) + } } } else if (learner$type == "surv") { - if (learner$predict.type == "prob") + if (learner$predict.type == "prob") { stop("Survival does not support prediction of probabilites yet.") - if (!is.numeric(p)) + } + if (!is.numeric(p)) { stopf("predictLearner for %s has returned a class %s instead of a numeric!", learner$id, cl) - } else if (learner$type == "cluster") { + } + } else if (learner$type == "cluster") { if (learner$predict.type == "response") { - if (cl != "integer") + if (cl != "integer") { stopf("predictLearner for %s has returned a class %s instead of an integer!", learner$id, cl) + } } else if (learner$predict.type == "prob") { - if (!is.matrix(p)) + if (!is.matrix(p)) { stopf("predictLearner for %s has returned a class %s instead of a matrix!", learner$id, cl) + } } - } else if (learner$type == "multilabel") { + } else if (learner$type == "multilabel") { if (learner$predict.type == "response") { - if (!(is.matrix(p) && typeof(p) == "logical")) + if (!(is.matrix(p) && typeof(p) == "logical")) { stopf("predictLearner for %s has returned a class %s instead of a logical matrix!", learner$id, cl) - } else if (learner$predict.type == "prob") { - if (!(is.matrix(p) && typeof(p) == "double")) + } + } else if (learner$predict.type == "prob") { + if (!(is.matrix(p) && typeof(p) == "double")) { stopf("predictLearner for %s has returned a class %s instead of a numerical matrix!", learner$id, cl) + } } } return(p) diff --git a/R/relativeOverfitting.R b/R/relativeOverfitting.R index e40c20a42b..11fcf54e86 100644 --- a/R/relativeOverfitting.R +++ b/R/relativeOverfitting.R @@ -28,12 +28,14 @@ #' @name estimateRelativeOverfitting #' @rdname estimateRelativeOverfitting estimateRelativeOverfitting = function(predish, measures, task, learner = NULL, pred.train = NULL, iter = 1) { + assertClass(task, classes = "Task") UseMethod("estimateRelativeOverfitting") } #' @export estimateRelativeOverfitting.ResampleDesc = function(predish, measures, task, learner, ...) { + assertClass(learner, classes = "Learner") measures = checkMeasures(measures, task) @@ -45,11 +47,13 @@ estimateRelativeOverfitting.ResampleDesc = function(predish, measures, task, lea #' @export estimateRelativeOverfitting.ResamplePrediction = function(predish, measures, task, ...) { + measures = checkMeasures(measures, task) mids = vcapply(measures, function(m) m$id) iterations = unique(predish$data$iter) rbindlist(lapply(iterations, function(i) { + data = predish$data[predish$data$iter == i & predish$data$set == "test", ] pred.test = makePrediction(task$task.desc, row.names(data), data$id, data$truth, predish$predict.type, predish$predict.threshold, data$response, predish$time[i]) @@ -62,6 +66,7 @@ estimateRelativeOverfitting.ResamplePrediction = function(predish, measures, tas #' @export estimateRelativeOverfitting.Prediction = function(predish, measures, task, learner, pred.train, iter = 1) { + assertClass(pred.train, classes = "Prediction") measures = checkMeasures(measures, task) mids = vcapply(measures, function(m) m$id) diff --git a/R/removeConstantFeatures.R b/R/removeConstantFeatures.R index a66c80cf23..3298bed8fe 100644 --- a/R/removeConstantFeatures.R +++ b/R/removeConstantFeatures.R @@ -28,11 +28,13 @@ #' @export #' @family eda_and_preprocess removeConstantFeatures = function(obj, perc = 0, dont.rm = character(0L), na.ignore = FALSE, tol = .Machine$double.eps^.5, show.info = getMlrOption("show.info")) { + UseMethod("removeConstantFeatures") } #' @export removeConstantFeatures.Task = function(obj, perc = 0, dont.rm = character(0L), na.ignore = FALSE, tol = .Machine$double.eps^.5, show.info = getMlrOption("show.info")) { + assertCharacter(dont.rm) dont.rm = union(dont.rm, getTaskTargetNames(obj)) data = removeConstantFeatures(getTaskData(obj), perc = perc, dont.rm = dont.rm, na.ignore = na.ignore, tol = tol, show.info = show.info) @@ -41,26 +43,32 @@ removeConstantFeatures.Task = function(obj, perc = 0, dont.rm = character(0L), n #' @export removeConstantFeatures.data.frame = function(obj, perc = 0, dont.rm = character(0L), na.ignore = FALSE, tol = .Machine$double.eps^.5, show.info = getMlrOption("show.info")) { + assertNumber(perc, lower = 0, upper = 1) assertSubset(dont.rm, choices = names(obj)) assertFlag(na.ignore) assertNumber(tol, lower = 0) assertFlag(show.info) - if (any(!dim(obj))) + if (any(!dim(obj))) { return(obj) + } isEqual = function(x, y) { + res = (x == y) | (is.na(x) & is.na(y)) replace(res, is.na(res), FALSE) } digits = ceiling(log10(1 / tol)) cns = setdiff(colnames(obj), dont.rm) ratio = vnapply(obj[cns], function(x) { - if (allMissing(x)) + + if (allMissing(x)) { return(0) - if (is.double(x)) + } + if (is.double(x)) { x = round(x, digits = digits) + } m = computeMode(x, na.rm = na.ignore, ties.method = "first") if (na.ignore) { mean(m != x, na.rm = TRUE) @@ -70,8 +78,8 @@ removeConstantFeatures.data.frame = function(obj, perc = 0, dont.rm = character( }, use.names = FALSE) dropcols = cns[ratio <= perc] - if (show.info && length(dropcols)) + if (show.info && length(dropcols)) { messagef("Removing %i columns: %s", length(dropcols), collapse(dropcols)) + } dropNamed(obj, dropcols) } - diff --git a/R/removeHyperPars.R b/R/removeHyperPars.R index 8caa7dd753..14ec30a503 100644 --- a/R/removeHyperPars.R +++ b/R/removeHyperPars.R @@ -12,18 +12,19 @@ #' @export #' @family learner removeHyperPars = function(learner, ids = character(0L)) { + assertClass(learner, classes = "Learner") assertCharacter(ids, any.missing = FALSE) d = setdiff(ids, names(getHyperPars(learner))) - if (length(d) > 0L) + if (length(d) > 0L) { stopf("Trying to remove param settings which were not set before: %s", collapse(d)) + } UseMethod("removeHyperPars") } #' @export removeHyperPars.Learner = function(learner, ids = character(0L)) { + learner$par.vals[ids] = NULL return(learner) } - - diff --git a/R/resample.R b/R/resample.R index 5c39a45149..9d2a526e8f 100644 --- a/R/resample.R +++ b/R/resample.R @@ -66,7 +66,7 @@ #' print(r$aggr) #' print(r$measures.test) #' print(r$pred) -#' +#' #' # include the training set performance as well #' rdesc = makeResampleDesc("CV", iters = 2, predict = "both") #' r = resample(makeLearner("classif.qda"), task, rdesc, @@ -80,23 +80,28 @@ resample = function(learner, task, resampling, measures, weights = NULL, models assertClass(task, classes = "Task") n = getTaskSize(task) # instantiate resampling - if (inherits(resampling, "ResampleDesc")) + if (inherits(resampling, "ResampleDesc")) { resampling = makeResampleInstance(resampling, task = task) + } assertClass(resampling, classes = "ResampleInstance") measures = checkMeasures(measures, task) if (!is.null(weights)) { assertNumeric(weights, len = n, any.missing = FALSE, lower = 0) } assertFlag(models) - if (missing(extract)) - extract = function(model) {} - else + if (missing(extract)) { + extract = function(model) { + + } + } else { assertFunction(extract) + } assertFlag(show.info) r = resampling$size - if (n != r) + if (n != r) { stop(stri_paste("Size of data set:", n, "and resampling instance:", r, "differ!", sep = " ")) + } checkLearnerBeforeTrain(task, learner, weights) checkAggrsBeforeResample(measures, resampling$desc) @@ -140,6 +145,7 @@ resample = function(learner, task, resampling, measures, weights = NULL, models # this wraps around calculateREsampleIterationResult and contains the subsetting for a specific fold i doResampleIteration = function(learner, task, rin, i, measures, weights, model, extract, show.info) { + setSlaveOptions() train.i = rin$train.inds[[i]] test.i = rin$test.inds[[i]] @@ -148,7 +154,7 @@ doResampleIteration = function(learner, task, rin, i, measures, weights, model, } -#Evaluate one train/test split of the resample function and get one or more performance values +# Evaluate one train/test split of the resample function and get one or more performance values calculateResampleIterationResult = function(learner, task, i, train.i, test.i, measures, weights, rdesc, model, extract, show.info) { @@ -243,8 +249,9 @@ calculateResampleIterationResult = function(learner, task, i, train.i, test.i, m } -#Merge a list of train/test splits created by calculateResampleIterationResult to one resample result +# Merge a list of train/test splits created by calculateResampleIterationResult to one resample result mergeResampleResult = function(learner.id, task, iter.results, measures, rin, models, extract, keep.pred, show.info, runtime) { + iters = length(iter.results) mids = vcapply(measures, function(m) m$id) @@ -259,6 +266,7 @@ mergeResampleResult = function(learner.id, task, iter.results, measures, rin, mo # aggr = vnapply(measures, function(m) m$aggr$fun(task, ms.test[, m$id], ms.train[, m$id], m, rin$group, pred)) aggr = vnapply(seq_along(measures), function(i) { + m = measures[[i]] m$aggr$fun(task, ms.test[, i], ms.train[, i], m, rin$group, pred) }) @@ -290,8 +298,9 @@ mergeResampleResult = function(learner.id, task, iter.results, measures, rin, mo message("\n") } - if (!keep.pred) + if (!keep.pred) { pred = NULL + } list( learner.id = learner.id, diff --git a/R/resample_convenience.R b/R/resample_convenience.R index 14bd43bdb8..2d2c4c0669 100644 --- a/R/resample_convenience.R +++ b/R/resample_convenience.R @@ -3,6 +3,7 @@ #' @rdname resample #' @export crossval = function(learner, task, iters = 10L, stratify = FALSE, measures, models = FALSE, keep.pred = TRUE, ..., show.info = getMlrOption("show.info")) { + learner = checkLearner(learner) learner = setHyperPars(learner, ...) rdesc = makeResampleDesc("CV", iters = iters, stratify = stratify) @@ -12,6 +13,7 @@ crossval = function(learner, task, iters = 10L, stratify = FALSE, measures, mode #' @rdname resample #' @export repcv = function(learner, task, folds = 10L, reps = 10L, stratify = FALSE, measures, models = FALSE, keep.pred = TRUE, ..., show.info = getMlrOption("show.info")) { + learner = checkLearner(learner) learner = setHyperPars(learner, ...) rdesc = makeResampleDesc("RepCV", folds = folds, reps = reps, stratify = stratify) @@ -21,6 +23,7 @@ repcv = function(learner, task, folds = 10L, reps = 10L, stratify = FALSE, measu #' @rdname resample #' @export holdout = function(learner, task, split = 2 / 3, stratify = FALSE, measures, models = FALSE, keep.pred = TRUE, ..., show.info = getMlrOption("show.info")) { + learner = checkLearner(learner) learner = setHyperPars(learner, ...) rdesc = makeResampleDesc("Holdout", split = split, stratify = stratify) @@ -30,6 +33,7 @@ holdout = function(learner, task, split = 2 / 3, stratify = FALSE, measures, mod #' @rdname resample #' @export subsample = function(learner, task, iters = 30, split = 2 / 3, stratify = FALSE, measures, models = FALSE, keep.pred = TRUE, ..., show.info = getMlrOption("show.info")) { + learner = checkLearner(learner) learner = setHyperPars(learner, ...) rdesc = makeResampleDesc("Subsample", iters = iters, split = split, stratify = stratify) @@ -39,6 +43,7 @@ subsample = function(learner, task, iters = 30, split = 2 / 3, stratify = FALSE, #' @rdname resample #' @export bootstrapOOB = function(learner, task, iters = 30, stratify = FALSE, measures, models = FALSE, keep.pred = TRUE, ..., show.info = getMlrOption("show.info")) { + learner = checkLearner(learner) learner = setHyperPars(learner, ...) rdesc = makeResampleDesc("Bootstrap", iters = iters, stratify = stratify) @@ -48,6 +53,7 @@ bootstrapOOB = function(learner, task, iters = 30, stratify = FALSE, measures, m #' @rdname resample #' @export bootstrapB632 = function(learner, task, iters = 30, stratify = FALSE, measures, models = FALSE, keep.pred = TRUE, ..., show.info = getMlrOption("show.info")) { + learner = checkLearner(learner) learner = setHyperPars(learner, ...) assertClass(task, classes = "Task") @@ -59,6 +65,7 @@ bootstrapB632 = function(learner, task, iters = 30, stratify = FALSE, measures, #' @rdname resample #' @export bootstrapB632plus = function(learner, task, iters = 30, stratify = FALSE, measures, models = FALSE, keep.pred = TRUE, ..., show.info = getMlrOption("show.info")) { + learner = checkLearner(learner) learner = setHyperPars(learner, ...) assertClass(task, classes = "Task") @@ -70,6 +77,7 @@ bootstrapB632plus = function(learner, task, iters = 30, stratify = FALSE, measur #' @rdname resample #' @export growingcv = function(learner, task, horizon = 1, initial.window = .5, skip = 0, measures, models = FALSE, keep.pred = TRUE, ..., show.info = getMlrOption("show.info")) { + learner = checkLearner(learner, ...) rdesc = makeResampleDesc("GrowingCV", horizon = horizon, initial.window = initial.window, skip = skip) measures = checkMeasures(measures, task, aggr = b632plus) @@ -79,6 +87,7 @@ growingcv = function(learner, task, horizon = 1, initial.window = .5, skip = 0, #' @rdname resample #' @export fixedcv = function(learner, task, horizon = 1L, initial.window = .5, skip = 0, measures, models = FALSE, keep.pred = TRUE, ..., show.info = getMlrOption("show.info")) { + learner = checkLearner(learner, ...) rdesc = makeResampleDesc("FixedCV", horizon = horizon, initial.window = initial.window, skip = skip) measures = checkMeasures(measures, task, aggr = b632plus) diff --git a/R/selectFeatures.R b/R/selectFeatures.R index 74506dd653..60f8c42e1a 100644 --- a/R/selectFeatures.R +++ b/R/selectFeatures.R @@ -46,10 +46,12 @@ selectFeatures = function(learner, task, resampling, measures, learner = checkLearner(learner) assertClass(task, classes = "SupervisedTask") - if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) + if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) { stop("Argument resampling must be of class ResampleDesc or ResampleInstance!") - if (inherits(resampling, "ResampleDesc") && control$same.resampling.instance) + } + if (inherits(resampling, "ResampleDesc") && control$same.resampling.instance) { resampling = makeResampleInstance(resampling, task = task) + } measures = checkMeasures(measures, learner) if (missing(bit.names)) { bit.names = getTaskFeatureNames(task) @@ -65,6 +67,7 @@ selectFeatures = function(learner, task, resampling, measures, assertFunction(bits.to.features, args = c("x", "task")) # wrap the function to prevent wrong user input and give meaningful errors bits.to.features2 = function(x, task) { + force(bits.to.features) res = bits.to.features(x, task) if (!testCharacter(res)) { @@ -82,7 +85,7 @@ selectFeatures = function(learner, task, resampling, measures, par.set = lapply(bit.names, function(bn) makeIntegerParam(bn)) par.set = do.call(makeParamSet, par.set) - #checkVarselParset(learner, par.set, bit.names, control) + # checkVarselParset(learner, par.set, bit.names, control) need.extra = control$tune.threshold || getMlrOption("on.error.dump") opt.path = makeOptPathDFFromMeasures(par.set, measures, include.extra = need.extra) control = setDefaultImputeVal(control, measures) @@ -102,8 +105,9 @@ selectFeatures = function(learner, task, resampling, measures, or = sel.func(learner, task, resampling, measures, bit.names, bits.to.features2, control, opt.path, show.info) - if (show.info) + if (show.info) { messagef("[FeatSel] Result: %s (%i bits)", clipString(collapse(or$x.bit.names), 30L), length(or$x.bit.names), perfsToString(or$y)) + } return(or) } diff --git a/R/selectFeaturesExhaustive.R b/R/selectFeaturesExhaustive.R index 4b3bb00751..5a59a154b1 100644 --- a/R/selectFeaturesExhaustive.R +++ b/R/selectFeaturesExhaustive.R @@ -1,12 +1,14 @@ selectFeaturesExhaustive = function(learner, task, resampling, measures, bit.names, bits.to.features, control, opt.path, show.info) { + p = length(bit.names) states = list(rep(0, p)) for (i in seq_len(min(control$max.features, p, na.rm = TRUE))) { x = combn(seq_len(p), i) s = lapply(seq_col(x), function(j) { - b = rep(0, p) - b[x[, j]] = 1 - b + + b = rep(0, p) + b[x[, j]] = 1 + b }) states = c(states, s) } diff --git a/R/selectFeaturesGA.R b/R/selectFeaturesGA.R index 9daffe3848..c45d57eea2 100644 --- a/R/selectFeaturesGA.R +++ b/R/selectFeaturesGA.R @@ -1,4 +1,5 @@ selectFeaturesGA = function(learner, task, resampling, measures, bit.names, bits.to.features, control, opt.path, show.info) { + # generate mu feature sets (of correct size) states = list() mu = control$extra.args$mu @@ -8,8 +9,9 @@ selectFeaturesGA = function(learner, task, resampling, measures, bit.names, bits for (i in seq_len(mu)) { while (TRUE) { states[[i]] = rbinom(length(bit.names), 1, 0.5) - if (is.na(control$max.features) || sum(states[[i]] <= control$max.features)) + if (is.na(control$max.features) || sum(states[[i]] <= control$max.features)) { break + } } } evalOptimizationStatesFeatSel(learner, task, resampling, measures, @@ -49,12 +51,14 @@ selectFeaturesGA = function(learner, task, resampling, measures, bit.names, bits # sample 2 random parents, CX, mutate --> 1 kid # (repeat in a loop if max.features not satisfied) generateKid = function(featmat, control) { + parents = sample(seq_row(featmat), 2L, replace = TRUE) while (TRUE) { kid = crossover(featmat[parents[1L], ], featmat[parents[2L], ], control$extra.args$crossover.rate) kid = mutateBits(kid, control$extra.args$mutation.rate) - if (is.na(control$max.features) || sum(kid) <= control$max.features) + if (is.na(control$max.features) || sum(kid) <= control$max.features) { break + } } return(kid) } diff --git a/R/selectFeaturesRandom.R b/R/selectFeaturesRandom.R index 71bb811ee1..bdcde2e1ce 100644 --- a/R/selectFeaturesRandom.R +++ b/R/selectFeaturesRandom.R @@ -2,7 +2,7 @@ selectFeaturesRandom = function(learner, task, resampling, measures, bit.names, control, opt.path, show.info) { states = lapply(seq_len(control$maxit), function(i) createStates(n = length(bit.names), - max.features = control$max.features, prob = control$extra.args$prob)) + max.features = control$max.features, prob = control$extra.args$prob)) evalOptimizationStatesFeatSel(learner, task, resampling, measures, bits.to.features, control, opt.path, show.info, states, 1L, NA_integer_) makeFeatSelResultFromOptPath(learner, measures, resampling, control, opt.path, task = task, bits.to.features = bits.to.features) @@ -10,14 +10,17 @@ selectFeaturesRandom = function(learner, task, resampling, measures, bit.names, # help function in order to respect max.features -createStates = function(n, max.features, prob){ - if (is.na(max.features)) +createStates = function(n, max.features, prob) { + + if (is.na(max.features)) { return(rbinom(n, 1, prob)) + } run.loop = TRUE while (run.loop) { x = rbinom(n, 1, prob) - if (sum(x) <= max.features) + if (sum(x) <= max.features) { run.loop = FALSE + } } return(x) } diff --git a/R/selectFeaturesSequential.R b/R/selectFeaturesSequential.R index cf3a54b8e5..55bd17f457 100644 --- a/R/selectFeaturesSequential.R +++ b/R/selectFeaturesSequential.R @@ -1,12 +1,16 @@ # FIXME: compare relative selectFeaturesSequential = function(learner, task, resampling, measures, bit.names, bits.to.features, control, opt.path, show.info) { + seq.step = function(forward, state, gen.new.states, compare) { + # we have too many vars already and cannot move forward - if (forward && !is.na(control$max.features) && control$max.features <= sum(unlist(state$x))) + if (forward && !is.na(control$max.features) && control$max.features <= sum(unlist(state$x))) { return(NULL) + } xs = gen.new.states(state$x) - if (length(xs) == 0) + if (length(xs) == 0) { return(NULL) + } dob = max(opt.path$env$dob) + 1L # die at once evalOptimizationStatesFeatSel(learner, task, resampling, measures, bits.to.features, control, opt.path, show.info, xs, dob, dob) @@ -26,6 +30,7 @@ selectFeaturesSequential = function(learner, task, resampling, measures, bit.nam } gen.new.states.sfs = function(x) { + xs = list() for (i in seq_along(x)) if (x[i] == 0) { @@ -37,6 +42,7 @@ selectFeaturesSequential = function(learner, task, resampling, measures, bit.nam } gen.new.states.sbs = function(x) { + xs = list() for (i in seq_along(x)) if (x[i] == 1) { @@ -74,7 +80,7 @@ selectFeaturesSequential = function(learner, task, resampling, measures, bit.nam forward = (method %in% c("sfs", "sffs")) fail = 0 - while ((method %in% c("sfs", "sbs") && fail == 0) || (method %in% c("sffs", "sfbs") && fail < 2)) { + while ((method %in% c("sfs", "sbs") && fail == 0) || (method %in% c("sffs", "sfbs") && fail < 2)) { state2 = seq.step(forward, state, gen.new.states, compare) # we could not move to state2 in normal step, stay where we are if (!is.null(state2)) { @@ -85,11 +91,11 @@ selectFeaturesSequential = function(learner, task, resampling, measures, bit.nam fail = fail + 1 } if (method %in% c("sffs", "sfbs")) { - #cat("forward:", !forward, "\n") + # cat("forward:", !forward, "\n") gns = switch(method, sffs = gen.new.states.sbs, sfbs = gen.new.states.sfs - ) + ) state2 = seq.step(!forward, state, gns, compare) if (!is.null(state2)) { state = state2 @@ -104,7 +110,8 @@ selectFeaturesSequential = function(learner, task, resampling, measures, bit.nam # if last generation contains no better element, go to second to last last = max(opt.path$env$dob) - if (all(opt.path$env$eol[opt.path$env$dob == last] == last)) + if (all(opt.path$env$eol[opt.path$env$dob == last] == last)) { last = last - 1 + } makeFeatSelResultFromOptPath(learner, measures, resampling, control, opt.path, dob = last, ties = "first", task = task, bits.to.features = bits.to.features) } diff --git a/R/setHyperPars.R b/R/setHyperPars.R index 983a1ad14c..2d0eb3f029 100644 --- a/R/setHyperPars.R +++ b/R/setHyperPars.R @@ -20,6 +20,7 @@ #' # note the now set and altered hyperparameters: #' print(cl2) setHyperPars = function(learner, ..., par.vals = list()) { + args = list(...) assertList(args, names = "unique", .var.name = "parameter settings") assertList(par.vals, names = "unique", .var.name = "parameter settings") @@ -33,13 +34,16 @@ setHyperPars = function(learner, ..., par.vals = list()) { #' List of named (hyper)parameter settings. #' @export setHyperPars2 = function(learner, par.vals) { + UseMethod("setHyperPars2") } #' @export setHyperPars2.Learner = function(learner, par.vals) { - if (length(par.vals) == 0L) + + if (length(par.vals) == 0L) { return(learner) + } ns = names(par.vals) pars = learner$par.set$pars @@ -82,7 +86,7 @@ setHyperPars2.Learner = function(learner, par.vals) { } ## if valname of discrete par was used, transform it to real value - #if (pd$type == "discrete" && is.character(p) && length(p) == 1 && p %in% names(pd$values)) + # if (pd$type == "discrete" && is.character(p) && length(p) == 1 && p %in% names(pd$values)) # p = pd$values[[p]] learner$par.vals[[n]] = p } diff --git a/R/setId.R b/R/setId.R index aed7848797..1d56926b93 100644 --- a/R/setId.R +++ b/R/setId.R @@ -10,12 +10,10 @@ #' @export #' @family learner setId = function(learner, id) { + .Deprecated("setLearnerId") learner = checkLearner(learner) assertString(id) learner$id = id return(learner) } - - - diff --git a/R/setPredictThreshold.R b/R/setPredictThreshold.R index 69d2875e20..3fb263186c 100644 --- a/R/setPredictThreshold.R +++ b/R/setPredictThreshold.R @@ -12,12 +12,12 @@ #' @family learner #' @export setPredictThreshold = function(learner, predict.threshold) { + learner = checkLearner(learner, type = "classif") - if (learner$predict.type != "prob") + if (learner$predict.type != "prob") { stopf("predict.type = 'prob' must hold to set a predict.threshold!") + } assertNumeric(predict.threshold, any.missing = FALSE) learner$predict.threshold = predict.threshold return(learner) } - - diff --git a/R/setPredictType.R b/R/setPredictType.R index 2275128fc1..20c89f722f 100644 --- a/R/setPredictType.R +++ b/R/setPredictType.R @@ -21,12 +21,14 @@ #' @family learner #' @export setPredictType = function(learner, predict.type) { + assertClass(learner, classes = "Learner") UseMethod("setPredictType") } #' @export setPredictType.Learner = function(learner, predict.type) { + # checks should be done down here i guess, because of recursive calls in wrappers assertChoice(predict.type, choices = switch(learner$type, classif = c("response", "prob"), @@ -36,10 +38,12 @@ setPredictType.Learner = function(learner, predict.type) { costsens = "response", cluster = c("response", "prob") )) - if (predict.type == "prob" && !hasLearnerProperties(learner, "prob")) + if (predict.type == "prob" && !hasLearnerProperties(learner, "prob")) { stopf("Trying to predict probs, but %s does not support that!", learner$id) - if (predict.type == "se" && !hasLearnerProperties(learner, "se")) + } + if (predict.type == "se" && !hasLearnerProperties(learner, "se")) { stopf("Trying to predict standard errors, but %s does not support that!", learner$id) + } learner$predict.type = predict.type return(learner) } diff --git a/R/setThreshold.R b/R/setThreshold.R index 89976ddf02..93ad7fe1db 100644 --- a/R/setThreshold.R +++ b/R/setThreshold.R @@ -21,18 +21,19 @@ #' task = makeClassifTask(data = iris, target = "Species") #' lrn = makeLearner("classif.lda", predict.type = "prob") #' mod = train(lrn, task) -#' +#' #' # predict probabilities and compute performance #' pred = predict(mod, newdata = iris) #' performance(pred, measures = mmce) #' head(as.data.frame(pred)) -#' +#' #' # adjust threshold and predict probabilities again #' threshold = c(setosa = 0.4, versicolor = 0.3, virginica = 0.3) #' pred = setThreshold(pred, threshold = threshold) #' performance(pred, measures = mmce) #' head(as.data.frame(pred)) setThreshold = function(pred, threshold) { + # dont check for NAs in response, this will get overwritten anyway. # and object might not be constructed in full when we call this in Prediction checkPrediction(pred, task.type = c("classif", "multilabel"), predict.type = "prob", no.na = FALSE) @@ -44,8 +45,9 @@ setThreshold = function(pred, threshold) { threshold = c(threshold, 1 - threshold) names(threshold) = c(td$positive, td$negative) } - if (length(threshold) > 1L && !setequal(levs, names(threshold))) + if (length(threshold) > 1L && !setequal(levs, names(threshold))) { stop("Threshold names must correspond to classes!") + } p = getPredictionProbabilities(pred, cl = levs) # resort so we have same order in threshold and p threshold = threshold[levs] @@ -67,4 +69,3 @@ setThreshold = function(pred, threshold) { pred$threshold = threshold return(pred) } - diff --git a/R/simplifyMeasureNames.R b/R/simplifyMeasureNames.R index baa0f09e66..449bd6c1c3 100644 --- a/R/simplifyMeasureNames.R +++ b/R/simplifyMeasureNames.R @@ -10,6 +10,7 @@ #' @return ([character]). #' @export simplifyMeasureNames = function(xs) { + assertCharacter(xs, any.missing = FALSE) # get all measure names all.measure.names = listMeasures() diff --git a/R/smote.R b/R/smote.R index 9af5e3dbd5..408a4634df 100644 --- a/R/smote.R +++ b/R/smote.R @@ -41,14 +41,16 @@ #' @export #' @useDynLib mlr c_smote smote = function(task, rate, nn = 5L, standardize = TRUE, alt.logic = FALSE) { + checkTask(task, binary = TRUE) assertNumber(rate, lower = 1) nn = asInt(nn, lower = 1L) requirePackages("cluster", why = "smote", default.method = "load") # check for changeData later - if (!is.null(getTaskWeights(task))) + if (!is.null(getTaskWeights(task))) { stopf("SMOTE cannot be used with weights in task!") + } # shortcuts data = getTaskData(task) @@ -56,13 +58,15 @@ smote = function(task, rate, nn = 5L, standardize = TRUE, alt.logic = FALSE) { y = data[, target] x = dropNamed(data, target) z = getMinMaxClass(y) - if (z$min.size < nn) + if (z$min.size < nn) { stopf("You cannot set nn = %i, when the minimal class has size %i!", nn, z$min.size) + } x.min = x[z$min.inds, , drop = FALSE] n.min = nrow(x.min) # number of NEW cases n.new = ifelse(alt.logic, as.integer(rate - 1) * n.min, round((rate - 1) * n.min)) - if (n.new <= 0L) + if (n.new <= 0L) { return(task) + } res = matrix(0, n.new, ncol(x)) is.num = vlapply(x, is.numeric) @@ -71,8 +75,9 @@ smote = function(task, rate, nn = 5L, standardize = TRUE, alt.logic = FALSE) { x.min.matrix = x.min if (any(!is.num)) { for (i in seq_col(x.min.matrix)) { - if (!is.num[i]) + if (!is.num[i]) { x.min.matrix[, i] = as.numeric(as.integer(x.min.matrix[, i])) + } } } x.min.matrix = as.matrix(x.min.matrix) @@ -91,8 +96,9 @@ smote = function(task, rate, nn = 5L, standardize = TRUE, alt.logic = FALSE) { x.scaled = scale(x.min.matrix, x.min.matrix[i, ], ranges) if (any(!is.num)) { for (j in seq_col(x.scaled)) { - if (!is.num[j]) + if (!is.num[j]) { x.scaled[, j] = (x.scaled[, j] != 0) + } } } dist = drop(x.scaled^2 %*% rep(1, ncol(x.scaled))) @@ -110,9 +116,10 @@ smote = function(task, rate, nn = 5L, standardize = TRUE, alt.logic = FALSE) { res[(i - 1) * n.new.obs + n, ] = x.min.matrix[i, ] + runif(1) * diffs if (any(!is.num)) { for (j in seq_col(x.min.matrix)) { - if (!is.num[j]) + if (!is.num[j]) { res[(i - 1) * n.new.obs + n, j] = c(x.min.matrix[knns[neigh], j], x.min.matrix[i, j])[1 + round(runif(1), 0)] + } } } } @@ -135,7 +142,7 @@ smote = function(task, rate, nn = 5L, standardize = TRUE, alt.logic = FALSE) { for (i in seq_len(ncol(res))) { if (!is.num[i]) { res[, i] = factor(levels(x[, i])[as.integer(res[, i])], - levels = levels(x[, i])) + levels = levels(x[, i])) } } } diff --git a/R/summarizeColumns.R b/R/summarizeColumns.R index 75ad604549..a50b893171 100644 --- a/R/summarizeColumns.R +++ b/R/summarizeColumns.R @@ -26,22 +26,27 @@ #' @examples #' summarizeColumns(iris) summarizeColumns = function(obj) { + UseMethod("summarizeColumns") } #' @export summarizeColumns.Task = function(obj) { + summarizeColumns.data.frame(obj$env$data) } #' @export summarizeColumns.data.frame = function(obj) { + iqv = function(x, ...) { + 1 - mean(x == computeMode(x)) } # to be read as: is obj is numeric, return x, else call y(x) ifn = function(obj, x, y, ...) { + if (is.numeric(obj)) y = x if (is.function(y)) y(obj, ...) else y } diff --git a/R/summarizeLevels.R b/R/summarizeLevels.R index 25361cc0aa..4404516258 100644 --- a/R/summarizeLevels.R +++ b/R/summarizeLevels.R @@ -12,16 +12,19 @@ #' @family eda_and_preprocess #' summarizeLevels(iris) summarizeLevels = function(obj, cols = NULL) { + UseMethod("summarizeLevels") } #' @export summarizeLevels.Task = function(obj, cols = NULL) { + summarizeLevels.data.frame(obj$env$data, cols = cols) } #' @export summarizeLevels.data.frame = function(obj, cols = NULL) { + pred = function(x) is.factor(x) || is.logical(x) || is.character(x) cns = colnames(obj)[vlapply(obj, pred)] if (!is.null(cols)) { diff --git a/R/train.R b/R/train.R index 277be2bcae..b600ef8569 100644 --- a/R/train.R +++ b/R/train.R @@ -16,31 +16,35 @@ #' @seealso [predict.WrappedModel] #' @examples #' training.set = sample(seq_len(nrow(iris)), nrow(iris) / 2) -#' +#' #' ## use linear discriminant analysis to classify iris data #' task = makeClassifTask(data = iris, target = "Species") #' learner = makeLearner("classif.lda", method = "mle") #' mod = train(learner, task, subset = training.set) #' print(mod) -#' +#' #' ## use random forest to classify iris data #' task = makeClassifTask(data = iris, target = "Species") #' learner = makeLearner("classif.rpart", minsplit = 7, predict.type = "prob") #' mod = train(learner, task, subset = training.set) #' print(mod) train = function(learner, task, subset = NULL, weights = NULL) { + learner = checkLearner(learner) assertClass(task, classes = "Task") - if (is.logical(subset)) - subset = which(subset) # I believe this is a bug, see #2098 + if (is.logical(subset)) { + subset = which(subset) + } # I believe this is a bug, see #2098 task = subsetTask(task, subset) if (is.null(subset)) { subset = seq_len(getTaskSize(task)) } else { - if (is.logical(subset)) - subset = which(subset) # I believe this is a bug, see #2098 - else + if (is.logical(subset)) { + subset = which(subset) + } # I believe this is a bug, see #2098 + else { subset = asInteger(subset) + } } if (learner$fix.factors.prediction) { tdat = getTaskData(task) @@ -79,24 +83,33 @@ train = function(learner, task, subset = NULL, weights = NULL) { opts = getLearnerOptions(learner, c("show.learner.output", "on.learner.error", "on.learner.warning", "on.error.dump")) # set the seed debug.seed = getMlrOption("debug.seed", NULL) - if (!is.null(debug.seed)) + if (!is.null(debug.seed)) { set.seed(debug.seed) + } # for optwrappers we want to see the tuning / varsel logging # FIXME: is case really ok for optwrapper? can we supppress then too? fun1 = if (opts$show.learner.output || inherits(learner, "OptWrapper")) identity else function(x) capture.output(suppressMessages(x)) fun2 = if (opts$on.learner.error == "stop") identity else function(x) try(x, silent = TRUE) - fun3 = if (opts$on.learner.error == "stop" || !opts$on.error.dump) identity else function(x) { + fun3 = if (opts$on.learner.error == "stop" || !opts$on.error.dump) { + identity + } else { + function(x) { + withCallingHandlers(x, error = function(c) utils::dump.frames()) } + } if (opts$on.learner.warning == "quiet") { old.warn.opt = getOption("warn") on.exit(options(warn = old.warn.opt)) options(warn = -1L) } - time.train = measureTime(fun1({learner.model = fun2(fun3(do.call(trainLearner, pars)))})) + time.train = measureTime(fun1({ + learner.model = fun2(fun3(do.call(trainLearner, pars))) + })) # was there an error during training? maybe warn then - if (is.error(learner.model) && opts$on.learner.error == "warn") + if (is.error(learner.model) && opts$on.learner.error == "warn") { warningf("Could not train learner %s: %s", learner$id, as.character(learner.model)) + } } factor.levels = getTaskFactorLevels(task) makeWrappedModel(learner, learner.model, getTaskDesc(task), subset, vars, factor.levels, time.train) diff --git a/R/trainLearner.R b/R/trainLearner.R index 92e33a6fd7..64c497125e 100644 --- a/R/trainLearner.R +++ b/R/trainLearner.R @@ -21,6 +21,6 @@ #' @return (any). Model of the underlying learner. #' @export trainLearner = function(.learner, .task, .subset, .weights = NULL, ...) { + UseMethod("trainLearner") } - diff --git a/R/tuneCMAES.R b/R/tuneCMAES.R index 9e4f36bc52..f702deb1bd 100644 --- a/R/tuneCMAES.R +++ b/R/tuneCMAES.R @@ -1,4 +1,5 @@ tuneCMAES = function(learner, task, resampling, measures, par.set, control, opt.path, show.info, resample.fun) { + requirePackages("cmaes", why = "tune_cmaes", default.method = "load") low = getLower(par.set) @@ -19,21 +20,25 @@ tuneCMAES = function(learner, task, resampling, measures, par.set, control, opt. budget = control$budget # either use user choice or lambda default, now lambda is set - if (is.null(ctrl.cmaes$lambda)) + if (is.null(ctrl.cmaes$lambda)) { ctrl.cmaes$lambda = 4 + floor(3 * log(N)) + } # if we have budget, calc maxit, otherwise use CMAES default, now maxit is set - maxit = if (is.null(budget)) + maxit = if (is.null(budget)) { ifelse(is.null(ctrl.cmaes$maxit), 100 * N^2, ctrl.cmaes$maxit) - else + } else { floor(budget / ctrl.cmaes$lambda) + } - if (!is.null(budget) && budget < ctrl.cmaes$lambda) + if (!is.null(budget) && budget < ctrl.cmaes$lambda) { stopf("Budget = %$i cannot be less than lambda = %i!", budget, ctrl.cmaes$lambda) + } - if (!is.null(ctrl.cmaes$maxit) && ctrl.cmaes$maxit != maxit) + if (!is.null(ctrl.cmaes$maxit) && ctrl.cmaes$maxit != maxit) { stopf("Provided setting of maxit = %i does not work with provided budget = %s, lambda = %i", ctrl.cmaes$maxit, ifelse(is.null(budget), "NULL", budget), ctrl.cmaes$lambda) + } ctrl.cmaes$maxit = maxit cmaes::cma_es(par = start, fn = tunerFitnFunVectorized, lower = low, upper = upp, control = ctrl.cmaes, diff --git a/R/tuneDesign.R b/R/tuneDesign.R index f98282948a..be2712b491 100644 --- a/R/tuneDesign.R +++ b/R/tuneDesign.R @@ -1,5 +1,6 @@ # tunes with a given data.frame conatining the design. tuneDesign = function(learner, task, resampling, measures, par.set, control, opt.path, show.info, resample.fun) { + xs = dfRowsToList(control$extra.args$design, par.set) evalOptimizationStatesTune(learner, task, resampling, measures, par.set, control, opt.path, show.info, xs, dobs = seq_along(xs), eols = NA_integer_, remove.nas = TRUE, diff --git a/R/tuneGenSA.R b/R/tuneGenSA.R index b4d8ec692f..f7b166716a 100644 --- a/R/tuneGenSA.R +++ b/R/tuneGenSA.R @@ -1,4 +1,5 @@ tuneGenSA = function(learner, task, resampling, measures, par.set, control, opt.path, show.info, resample.fun) { + requirePackages("GenSA", why = "tuneGenSA", default.method = "load") low = getLower(par.set) @@ -15,9 +16,10 @@ tuneGenSA = function(learner, task, resampling, measures, par.set, control, opt. # FIXME: the following condition can be removed, once we are able to fix the # budget in GenSA - if (!is.null(control$budget) && res$counts > control$budget) + if (!is.null(control$budget) && res$counts > control$budget) { warningf("GenSA used %i function calls, exceededing the given budget of %i evaluations.", res$counts, control$budget) + } makeTuneResultFromOptPath(learner, par.set, measures, resampling, control, opt.path) } diff --git a/R/tuneGrid.R b/R/tuneGrid.R index d74d67e991..3e4849928a 100644 --- a/R/tuneGrid.R +++ b/R/tuneGrid.R @@ -1,8 +1,10 @@ # tunes with grid search, all params are supported as we use generateGridDesign tuneGrid = function(learner, task, resampling, measures, par.set, control, opt.path, show.info, resample.fun) { + des = generateGridDesign(par.set, resolution = control$extra.args$resolution, trafo = FALSE) - if (!is.null(control$budget) && (nrow(des) != control$budget)) + if (!is.null(control$budget) && (nrow(des) != control$budget)) { stopf("The given budget (%i) does not fit to the size of the grid (%i).", control$budget, nrow(des)) + } xs = dfRowsToList(des, par.set) evalOptimizationStatesTune(learner, task, resampling, measures, par.set, control, opt.path, show.info, xs, dobs = seq_along(xs), eols = NA_integer_, remove.nas = TRUE, resample.fun = resample.fun) diff --git a/R/tuneIrace.R b/R/tuneIrace.R index 823899ead3..904fb006ad 100644 --- a/R/tuneIrace.R +++ b/R/tuneIrace.R @@ -1,7 +1,9 @@ tuneIrace = function(learner, task, resampling, measures, par.set, control, opt.path, show.info, resample.fun) { + requirePackages("irace", why = "tuneIrace", default.method = "load") targetRunnerParallel = function(experiment, exec.target.runner, scenario, target.runner) { + # get our param settings that irace should try cands = extractSubList(experiment, "configuration", simplify = FALSE) # some conversion code @@ -36,10 +38,13 @@ tuneIrace = function(learner, task, resampling, measures, par.set, control, opt. tuner.config = c(list(targetRunnerParallel = targetRunnerParallel, instances = instances, logFile = log.file), control$extra.args) g = if (show.irace.output) identity else capture.output - g({or = irace::irace(scenario = tuner.config, parameters = parameters)}) + g({ + or = irace::irace(scenario = tuner.config, parameters = parameters) + }) unlink(log.file) - if (nrow(or) == 0L) + if (nrow(or) == 0L) { stop("irace produced no result, possibly the budget was set too low?") + } # get best configuarion x1 = as.list(irace::removeConfigurationsMetaData(or[1L, ])) # we need chars, not factors / logicals, so we can match 'x' @@ -48,18 +53,20 @@ tuneIrace = function(learner, task, resampling, measures, par.set, control, opt. par.names = names(x1) # get all lines in opt.path which correspond to x and average their perf values j = vlapply(seq_row(d), function(i) isTRUE(all.equal(removeMissingValues(as.list(d[i, par.names, drop = FALSE])), - removeMissingValues(x1)))) - if (!any(j)) + removeMissingValues(x1)))) + if (!any(j)) { stop("No matching rows for final elite configuarion found in opt.path! This cannot be!") + } y = colMeans(d[j, opt.path$y.names, drop = FALSE]) # take first index of mating lines to get recommended x e = getOptPathEl(opt.path, which.first(j)) x = trafoValue(par.set, e$x) x = removeMissingValues(x) - if (control$tune.threshold) + if (control$tune.threshold) { # now get thresholds and average them threshold = getThresholdFromOptPath(opt.path, which(j)) - else + } else { threshold = NULL + } makeTuneResult(learner, control, x, y, resampling, threshold, opt.path) } diff --git a/R/tuneMultiCritGrid.R b/R/tuneMultiCritGrid.R index 9ad859197c..65d1012ba2 100644 --- a/R/tuneMultiCritGrid.R +++ b/R/tuneMultiCritGrid.R @@ -1,12 +1,12 @@ # tunes with grid search, all params are supported as we use generateGridDesign tuneMultiCritGrid = function(learner, task, resampling, measures, par.set, control, opt.path, show.info, resample.fun) { + des = generateGridDesign(par.set, resolution = control$extra.args$resolution, trafo = FALSE) - if (!is.null(control$budget) && (nrow(des) != control$budget)) + if (!is.null(control$budget) && (nrow(des) != control$budget)) { stopf("The given budget (%i) does not fit to the size of the grid (%i).", control$budget, nrow(des)) + } xs = dfRowsToList(des, par.set) evalOptimizationStatesTune(learner, task, resampling, measures, par.set, control, opt.path, show.info, xs, dobs = seq_along(xs), eols = NA_integer_, remove.nas = TRUE, resample.fun = resample.fun) makeTuneMultiCritResultFromOptPath(learner, par.set, measures, resampling, control, opt.path) } - - diff --git a/R/tuneMultiCritNSGA2.R b/R/tuneMultiCritNSGA2.R index 28f1fb2759..0dd552107e 100644 --- a/R/tuneMultiCritNSGA2.R +++ b/R/tuneMultiCritNSGA2.R @@ -16,4 +16,3 @@ tuneMultiCritNSGA2 = function(learner, task, resampling, measures, par.set, cont makeTuneMultiCritResultFromOptPath(learner, par.set, measures, resampling, control, opt.path) } - diff --git a/R/tuneMultiCritRandom.R b/R/tuneMultiCritRandom.R index e9c1321c58..56d83e6138 100644 --- a/R/tuneMultiCritRandom.R +++ b/R/tuneMultiCritRandom.R @@ -1,10 +1,8 @@ tuneMultiCritRandom = function(learner, task, resampling, measures, par.set, control, opt.path, show.info, resample.fun) { + vals = sampleValues(n = control$extra.args$maxit, par = par.set, trafo = FALSE) evalOptimizationStatesTune(learner, task, resampling, measures, par.set, control, opt.path, show.info, vals, dobs = seq_along(vals), eols = NA_integer_, remove.nas = TRUE, resample.fun = resample.fun) makeTuneMultiCritResultFromOptPath(learner, par.set, measures, resampling, control, opt.path) } - - - diff --git a/R/tuneParams.R b/R/tuneParams.R index e0ceb233d7..db59a9c60b 100644 --- a/R/tuneParams.R +++ b/R/tuneParams.R @@ -1,4 +1,4 @@ -#FIXME: check whether optimization can be paralleized if req. by user +# FIXME: check whether optimization can be paralleized if req. by user #' @title Hyperparameter tuning. #' @@ -63,7 +63,6 @@ #' df3 = generateHyperParsEffectData(res, trafo = TRUE) #' print(head(df2$data[, -ncol(df2$data)])) #' print(head(df3$data[, -ncol(df3$data)])) -#' #' \dontrun{ #' # we optimize the SVM over 3 kernels simultanously #' # note how we use dependent params (requires = ...) and iterated F-racing here @@ -82,7 +81,7 @@ #' print(res) #' df = as.data.frame(res$opt.path) #' print(head(df[, -ncol(df)])) -#' +#' #' # include the training set performance as well #' rdesc = makeResampleDesc("Holdout", predict = "both") #' res = tuneParams("classif.ksvm", iris.task, rdesc, par.set = ps, @@ -101,10 +100,12 @@ tuneParams = function(learner, task, resampling, measures, par.set, control, assertClass(par.set, classes = "ParamSet") assertClass(control, classes = "TuneControl") assertFunction(resample.fun) - if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) + if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) { stop("Argument resampling must be of class ResampleDesc or ResampleInstance!") - if (inherits(resampling, "ResampleDesc") && control$same.resampling.instance) + } + if (inherits(resampling, "ResampleDesc") && control$same.resampling.instance) { resampling = makeResampleInstance(resampling, task = task) + } assertFlag(show.info) checkTunerParset(learner, par.set, measures, control) control = setDefaultImputeVal(control, measures) @@ -125,15 +126,16 @@ tuneParams = function(learner, task, resampling, measures, par.set, control, opt.path = makeOptPathDFFromMeasures(par.set, measures, include.extra = need.extra) if (show.info) { messagef("[Tune] Started tuning learner %s for parameter set:", learner$id) - message(printToChar(par.set)) # using message() since this can go over the char limit of messagef(), see issue #1528 + message(printToChar(par.set)) # using message() since this can go over the char limit of messagef(), see issue #1528 messagef("With control class: %s", cl) messagef("Imputation value: %g", control$impute.val) } or = sel.func(learner, task, resampling, measures, par.set, control, - opt.path, show.info, resample.fun) - if (show.info) + opt.path, show.info, resample.fun) + if (show.info) { messagef("[Tune] Result: %s : %s", paramValueToString(par.set, or$x), perfsToString(or$y)) + } return(or) } @@ -150,6 +152,7 @@ tuneParams = function(learner, task, resampling, measures, par.set, control, #' @return ([ParamHelpers::OptPath]) or ([data.frame]). #' @export getTuneResultOptPath = function(tune.result, as.df = TRUE) { + if (as.df == TRUE) { return(as.data.frame(tune.result$opt.path)) } else { diff --git a/R/tuneParamsMultiCrit.R b/R/tuneParamsMultiCrit.R index f8aafd28fc..99d13f8cad 100644 --- a/R/tuneParamsMultiCrit.R +++ b/R/tuneParamsMultiCrit.R @@ -33,7 +33,7 @@ #' @examples #' \donttest{ #' # multi-criteria optimization of (tpr, fpr) with NGSA-II -#' lrn = makeLearner("classif.ksvm") +#' lrn = makeLearner("classif.ksvm") #' rdesc = makeResampleDesc("Holdout") #' ps = makeParamSet( #' makeNumericParam("C", lower = -12, upper = 12, trafo = function(x) 2^x), @@ -45,15 +45,18 @@ #' plotTuneMultiCritResult(res, path = TRUE) #' } tuneParamsMultiCrit = function(learner, task, resampling, measures, par.set, control, show.info = getMlrOption("show.info"), resample.fun = resample) { + learner = checkLearner(learner) assertClass(task, classes = "Task") assertList(measures, types = "Measure", min.len = 2L) assertClass(par.set, classes = "ParamSet") assertClass(control, classes = "TuneMultiCritControl") - if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) + if (!inherits(resampling, "ResampleDesc") && !inherits(resampling, "ResampleInstance")) { stop("Argument resampling must be of class ResampleDesc or ResampleInstance!") - if (inherits(resampling, "ResampleDesc") && control$same.resampling.instance) + } + if (inherits(resampling, "ResampleDesc") && control$same.resampling.instance) { resampling = makeResampleInstance(resampling, task = task) + } assertFlag(show.info) control = setDefaultImputeVal(control, measures) checkTunerParset(learner, par.set, measures, control) @@ -75,10 +78,8 @@ tuneParamsMultiCrit = function(learner, task, resampling, measures, par.set, con messagef("Imputation value: %g", control$impute.val) } or = sel.func(learner, task, resampling, measures, par.set, control, opt.path, show.info, resample.fun) - if (show.info) + if (show.info) { messagef("[Tune] Result: Points on front : %i", length(or$x)) + } return(or) } - - - diff --git a/R/tuneRandom.R b/R/tuneRandom.R index ca1595e7e3..050d71d2b5 100644 --- a/R/tuneRandom.R +++ b/R/tuneRandom.R @@ -1,4 +1,5 @@ tuneRandom = function(learner, task, resampling, measures, par.set, control, opt.path, show.info, resample.fun) { + vals = sampleValues(n = control$extra.args$maxit, par = par.set, trafo = FALSE) evalOptimizationStatesTune(learner, task, resampling, measures, par.set, control, opt.path, show.info, vals, dobs = seq_along(vals), eols = NA_integer_, remove.nas = TRUE, resample.fun) diff --git a/R/tuneThreshold.R b/R/tuneThreshold.R index 14e199d323..61b69386c5 100644 --- a/R/tuneThreshold.R +++ b/R/tuneThreshold.R @@ -27,14 +27,17 @@ #' @family tune #' @export tuneThreshold = function(pred, measure, task, model, nsub = 20L, control = list()) { + checkPrediction(pred, task.type = c("classif", "multilabel"), predict.type = "prob") td = pred$task.desc ttype = td$type measure = checkMeasures(measure, td)[[1L]] - if (!missing(task)) + if (!missing(task)) { assertClass(task, classes = "SupervisedTask") - if (!missing(model)) + } + if (!missing(model)) { assertClass(model, classes = "WrappedModel") + } assertList(control) probs = getPredictionProbabilities(pred) @@ -47,8 +50,10 @@ tuneThreshold = function(pred, measure, task, model, nsub = 20L, control = list( cls = pred$task.desc$class.levels k = length(cls) fitn = function(x) { - if (ttype == "multilabel" || k > 2) + + if (ttype == "multilabel" || k > 2) { names(x) = cls + } performance(setThreshold(pred, x), measure, task, model, simpleaggr = TRUE) } @@ -63,7 +68,7 @@ tuneThreshold = function(pred, measure, task, model, nsub = 20L, control = list( th = or$par / sum(or$par) names(th) = cls perf = or$value - } else {# classif with k = 2 + } else { # classif with k = 2 or = optimizeSubInts(f = fitn, lower = 0, upper = 1, maximum = !measure$minimize, nsub = nsub) th = or[[1]] perf = or$objective diff --git a/R/tunerFitnFun.R b/R/tunerFitnFun.R index 04309a8e35..f1d042425e 100644 --- a/R/tunerFitnFun.R +++ b/R/tunerFitnFun.R @@ -39,6 +39,7 @@ tunerSmoofFun = function(learner, task, resampling, measures, par.set, ctrl, opt fn = function(x) { + # tell smoof the optimization direction, don't transform y later tunerFitnFun(x, learner, task, resampling, measures, par.set, ctrl, opt.path, show.info, convertx, remove.nas, resample.fun, always.minimize = FALSE) } @@ -68,19 +69,22 @@ tunerFitnFunVectorized = function(xs, learner, task, resampling, measures, par.s # short helper that imputes illegal values and also negates for maximization problems convertYForTuner = function(y, measures, ctrl, always.minimize = TRUE) { + is.multicrit = inherits(ctrl, "TuneMultiCritControl") k = ifelse(is.multicrit, length(y), 1L) for (j in seq_len(k)) { z = y[[j]] # if there was any problem we return the imputed value that the user selected - if (is.na(z) || is.nan(z) || is.infinite(z)) + if (is.na(z) || is.nan(z) || is.infinite(z)) { z = ctrl$impute.val[[j]] + } # we now negate values for maximization y[[j]] = if (always.minimize && !measures[[j]]$minimize) -1 * z else z } # for multicrit, return vector (without names), otherwise just scalar y - if (inherits(ctrl, "TuneMultiCritControl")) + if (inherits(ctrl, "TuneMultiCritControl")) { return(as.numeric(y)) - else + } else { return(y[[1L]]) + } } diff --git a/R/utils.R b/R/utils.R index cade6fc8a5..344b1f84d5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,21 +1,26 @@ # get one el from each row of a matrix, given indices or col names (factors for colnames are converted to characters) getRowEls = function(mat, inds) { - if (is.factor(inds)) + + if (is.factor(inds)) { inds = as.character(inds) - if (is.character(inds)) + } + if (is.character(inds)) { inds = match(inds, colnames(mat)) + } inds = cbind(seq_row(mat), inds) mat[inds] } # get one el from each col of a matrix, given indices or row names getColEls = function(mat, inds) { + getRowEls(t(mat), inds) } # Do fuzzy string matching between input and a set of valid inputs # and return the most similar valid inputs. getNameProposals = function(input, possible.inputs, nproposals = 3L) { + assertString(input) assertCharacter(possible.inputs) assertInt(nproposals, lower = 1L) @@ -30,6 +35,7 @@ getNameProposals = function(input, possible.inputs, nproposals = 3L) { # shorter way of printing debug dumps #' @export print.mlr.dump = function(x, ...) { + cat("\n") invisible(NULL) } @@ -37,23 +43,26 @@ print.mlr.dump = function(x, ...) { # applys the appropriate getPrediction* helper function getPrediction = function(object, newdata, ...) { + pred = do.call("predict", c(list("object" = object, "newdata" = newdata), list(...))) point = switch(object$task.desc$type, "regr" = getPredictionResponse(pred), "surv" = getPredictionResponse(pred), - "classif" = if (object$learner$predict.type == "response") - getPredictionResponse(pred) else getPredictionProbabilities(pred)) + "classif" = if (object$learner$predict.type == "response") { + getPredictionResponse(pred) + } else { + getPredictionProbabilities(pred) + }) - if (object$learner$predict.type == "se") + if (object$learner$predict.type == "se") { cbind("preds" = point, "se" = getPredictionSE(pred)) - else + } else { point + } } # replacement for purrr::imap() imap = function(.x, .f) { + Map(.f, .x = .x, .y = seq_along(.x)) } - - - diff --git a/R/utils_imbalancy.R b/R/utils_imbalancy.R index b91b648395..ccb3e4a1da 100644 --- a/R/utils_imbalancy.R +++ b/R/utils_imbalancy.R @@ -1,6 +1,7 @@ # sort classes wrt to size getMinMaxClass = function(y) { + tab = table(y) j.small = getMinIndex(tab) ns = names(tab) @@ -26,20 +27,22 @@ getMinMaxClass = function(y) { # a) class cl is either oversampled or downsampled, depending on rate # b) the other binary class is either copied or bootstrapped (for variance) sampleBinaryClass = function(y, rate, cl, resample.other.class) { + inds1 = which(y == cl) # indices for class cl inds2 = setdiff(seq_along(y), inds1) # indices for other class newsize = round(length(inds1) * rate) # undersampling (rate < 1): reduce class1 by selecting newsize elements from it if (rate < 1) { newinds1 = sample(inds1, newsize, replace = FALSE) - # oversampling (rate > 1): take existing inds and sample add. inds with repl. + # oversampling (rate > 1): take existing inds and sample add. inds with repl. } else { newinds1 = c(inds1, sample(inds1, newsize - length(inds1), replace = TRUE)) } # now either copy or bootstrap other class - if (resample.other.class) + if (resample.other.class) { newinds2 = sample(inds2, length(inds2), replace = TRUE) - else + } else { newinds2 = inds2 + } c(newinds1, newinds2) } diff --git a/R/utils_opt.R b/R/utils_opt.R index 1cf01d5867..4342198003 100644 --- a/R/utils_opt.R +++ b/R/utils_opt.R @@ -1,13 +1,17 @@ # set default value fro y-imputation in optimization setDefaultImputeVal = function(control, measures) { + getDefVal = function(mm) { - if (identical(mm$aggr, test.mean) && is.finite(mm$worst)) + + if (identical(mm$aggr, test.mean) && is.finite(mm$worst)) { ifelse(mm$minimize, 1, -1) * mm$worst - else + } else { Inf + } } - if (is.null(control$impute.val)) + if (is.null(control$impute.val)) { control$impute.val = vnapply(measures, getDefVal) + } return(control) } @@ -15,7 +19,9 @@ setDefaultImputeVal = function(control, measures) { # if we have multiple rows we average the result # subset to those elements, which begin with "threshold." and also remove that prefix getThresholdFromOptPath = function(opt.path, inds) { + ths = asMatrixCols(lapply(inds, function(i) { + ex = getOptPathEl(opt.path, i)$extra ns = names(ex) ex = ex[stri_detect_regex(ns, "^threshold")] @@ -26,12 +32,15 @@ getThresholdFromOptPath = function(opt.path, inds) { ##### tuning ##### makeOptPathDFFromMeasures = function(par.set, measures, ...) { + ns = vcapply(measures, measureAggrName) - if (anyDuplicated(ns)) + if (anyDuplicated(ns)) { stop("Cannot create OptPath, measures do not have unique ids!") + } if (length(intersect(ns, names(par.set$pars))) > 0L || - length(intersect(ns, getParamIds(par.set, repeated = TRUE, with.nr = TRUE))) > 0L) + length(intersect(ns, getParamIds(par.set, repeated = TRUE, with.nr = TRUE))) > 0L) { stop("Cannot create OptPath, measures ids and dimension names of input space overlap!") + } minimize = vlapply(measures, function(m) m$minimize) names(minimize) = ns makeOptPathDF(par.set, ns, minimize, add.transformed.x = FALSE, @@ -41,6 +50,7 @@ makeOptPathDFFromMeasures = function(par.set, measures, ...) { ##### featsel ##### featuresToLogical = function(vars, all.vars) { + if (is.list(vars)) { # FIXME: use asMatrixCols / asMatrixRows y = t(sapply(vars, function(x) all.vars %in% x)) @@ -53,17 +63,19 @@ featuresToLogical = function(vars, all.vars) { } featuresToBinary = function(vars, all.vars) { + y = featuresToLogical(vars, all.vars) mode(y) = "integer" y } binaryToFeatures = function(x, all.vars) { + assertIntegerish(x, lower = 0, upper = 1, len = length(all.vars)) # We want to avoid vector recycling here all.vars[as.logical(x)] } compare.diff = function(state1, state2, control, measure, threshold) { + ifelse(measure$minimize, 1, -1) * (state1$y[1] - state2$y[1]) > threshold } - diff --git a/R/utils_plot.R b/R/utils_plot.R index 390c78fe13..c85e0cf768 100644 --- a/R/utils_plot.R +++ b/R/utils_plot.R @@ -2,6 +2,7 @@ # with error message containing the learner name that appeared more # than once checkDuplicatedLearnerNames = function(learner.names) { + dupl = duplicated(learner.names) if (any(dupl)) { dupl.learners = unique(learner.names[dupl]) diff --git a/R/zzz.R b/R/zzz.R index 1b45df96bf..d720c6598c 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -13,10 +13,12 @@ "_PACKAGE" .onLoad = function(libname, pkgname) { + backports::import(pkgname) } .onAttach = function(libname, pkgname) { + configureMlr() parallelRegisterLevels(package = "mlr", levels = c("benchmark", "resample", "selectFeatures", "tuneParams", "ensemble")) } @@ -25,14 +27,14 @@ mlr = new.env(parent = emptyenv()) ### Learner properties mlr$learner.properties = list( - classif = c("numerics", "factors", "ordered", "missings", "weights", "prob", "oneclass", "twoclass", "multiclass", "class.weights", "featimp", "oobpreds", "functionals", "single.functional"), + classif = c("numerics", "factors", "ordered", "missings", "weights", "prob", "oneclass", "twoclass", "multiclass", "class.weights", "featimp", "oobpreds", "functionals", "single.functional"), multilabel = c("numerics", "factors", "ordered", "missings", "weights", "prob", "oneclass", "twoclass", "multiclass", "functionals", "single.functional"), - regr = c("numerics", "factors", "ordered", "missings", "weights", "se", "featimp", "oobpreds", + regr = c("numerics", "factors", "ordered", "missings", "weights", "se", "featimp", "oobpreds", "functionals", "single.functional"), - cluster = c("numerics", "factors", "ordered", "missings", "weights", "prob", "functionals", + cluster = c("numerics", "factors", "ordered", "missings", "weights", "prob", "functionals", "single.functional"), - surv = c("numerics", "factors", "ordered", "missings", "weights", "prob", "lcens", "rcens", "icens", "featimp", "oobpreds", "functionals", "single.functional"), - costsens = c("numerics", "factors", "ordered", "missings", "weights", "prob", "twoclass", "multiclass", "functionals", "single.functional") + surv = c("numerics", "factors", "ordered", "missings", "weights", "prob", "lcens", "rcens", "icens", "featimp", "oobpreds", "functionals", "single.functional"), + costsens = c("numerics", "factors", "ordered", "missings", "weights", "prob", "twoclass", "multiclass", "functionals", "single.functional") ) mlr$learner.properties$any = unique(unlist(mlr$learner.properties)) diff --git a/man/ClassifTask.Rd b/man/ClassifTask.Rd new file mode 100644 index 0000000000..ae932c9e5d --- /dev/null +++ b/man/ClassifTask.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ClassifTask.R +\name{makeClassifTask} +\alias{makeClassifTask} +\title{Create a classification task.} +\usage{ +makeClassifTask(id = deparse(substitute(data)), data, target, + weights = NULL, blocking = NULL, coordinates = NULL, + positive = NA_character_, fixup.data = "warn", check.data = TRUE) +} +\arguments{ +\item{id}{(\code{character(1)})\cr +Id string for object. +Default is the name of the R variable passed to \code{data}.} + +\item{data}{(\link{data.frame})\cr +A data frame containing the features and target variable(s).} + +\item{target}{(\code{character(1)} | \code{character(2)} | \code{character(n.classes)})\cr +Name(s) of the target variable(s). +For survival analysis these are the names of the survival time and event columns, +so it has length 2. For multilabel classification it contains the names of the logical +columns that encode whether a label is present or not and its length corresponds to the +number of classes.} + +\item{weights}{(\link{numeric})\cr +Optional, non-negative case weight vector to be used during fitting. +Cannot be set for cost-sensitive learning. +Default is \code{NULL} which means no (= equal) weights.} + +\item{blocking}{(\link{factor})\cr +An optional factor of the same length as the number of observations. +Observations with the same blocking level \dQuote{belong together}. +Specifically, they are either put all in the training or the test set +during a resampling iteration. +Default is \code{NULL} which means no blocking.} + +\item{coordinates}{(\link{data.frame})\cr +Coordinates of a spatial data set that will be used for spatial partitioning of the data in a spatial cross-validation resampling setting. +Coordinates have to be numeric values. +Provided \link{data.frame} needs to have the same number of rows as data and consist of at least two dimensions.} + +\item{positive}{(\code{character(1)})\cr +Positive class for binary classification (otherwise ignored and set to NA). +Default is the first factor level of the target attribute.} + +\item{fixup.data}{(\code{character(1)})\cr +Should some basic cleaning up of data be performed? +Currently this means removing empty factor levels for the columns. +Possible choices are: +\dQuote{no} = Don't do it. +\dQuote{warn} = Do it but warn about it. +\dQuote{quiet} = Do it but keep silent. +Default is \dQuote{warn}.} + +\item{check.data}{(\code{logical(1)})\cr +Should sanity of data be checked initially at task creation? +You should have good reasons to turn this off (one might be speed). +Default is \code{TRUE}.} +} +\description{ +Create a classification task. +} +\seealso{ +\link{Task} \link{CostSensTask} \link{ClusterTask} \link{MultilabelTask} \link{RegrTask} \link{SurvTask} +} diff --git a/man/CostSensTask.Rd b/man/CostSensTask.Rd new file mode 100644 index 0000000000..03ff121b1e --- /dev/null +++ b/man/CostSensTask.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CostSensTask.R +\name{makeCostSensTask} +\alias{makeCostSensTask} +\title{Create a cost-sensitive classification task.} +\usage{ +makeCostSensTask(id = deparse(substitute(data)), data, costs, + blocking = NULL, coordinates = NULL, fixup.data = "warn", + check.data = TRUE) +} +\arguments{ +\item{id}{(\code{character(1)})\cr +Id string for object. +Default is the name of the R variable passed to \code{data}.} + +\item{data}{(\link{data.frame})\cr +A data frame containing the features and target variable(s).} + +\item{costs}{(\link{data.frame})\cr +A numeric matrix or data frame containing the costs of misclassification. +We assume the general case of observation specific costs. +This means we have n rows, corresponding to the observations, in the same order as \code{data}. +The columns correspond to classes and their names are the class labels +(if unnamed we use y1 to yk as labels). +Each entry (i,j) of the matrix specifies the cost of predicting class j +for observation i.} + +\item{blocking}{(\link{factor})\cr +An optional factor of the same length as the number of observations. +Observations with the same blocking level \dQuote{belong together}. +Specifically, they are either put all in the training or the test set +during a resampling iteration. +Default is \code{NULL} which means no blocking.} + +\item{coordinates}{(\link{data.frame})\cr +Coordinates of a spatial data set that will be used for spatial partitioning of the data in a spatial cross-validation resampling setting. +Coordinates have to be numeric values. +Provided \link{data.frame} needs to have the same number of rows as data and consist of at least two dimensions.} + +\item{fixup.data}{(\code{character(1)})\cr +Should some basic cleaning up of data be performed? +Currently this means removing empty factor levels for the columns. +Possible choices are: +\dQuote{no} = Don't do it. +\dQuote{warn} = Do it but warn about it. +\dQuote{quiet} = Do it but keep silent. +Default is \dQuote{warn}.} + +\item{check.data}{(\code{logical(1)})\cr +Should sanity of data be checked initially at task creation? +You should have good reasons to turn this off (one might be speed). +Default is \code{TRUE}.} +} +\description{ +Create a cost-sensitive classification task. +} +\seealso{ +\link{Task} \link{ClassifTask} \link{ClusterTask} \link{MultilabelTask} \link{RegrTask} \link{SurvTask} + +Other costsens: \code{\link{makeCostSensClassifWrapper}}, + \code{\link{makeCostSensRegrWrapper}}, + \code{\link{makeCostSensWeightedPairsWrapper}} +} +\concept{costsens} diff --git a/man/MultilabelTask.Rd b/man/MultilabelTask.Rd new file mode 100644 index 0000000000..63c84b095c --- /dev/null +++ b/man/MultilabelTask.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/MultilabelTask.R +\name{makeMultilabelTask} +\alias{makeMultilabelTask} +\title{Create a multilabel task.} +\usage{ +makeMultilabelTask(id = deparse(substitute(data)), data, target, + weights = NULL, blocking = NULL, coordinates = NULL, + fixup.data = "warn", check.data = TRUE) +} +\arguments{ +\item{id}{(\code{character(1)})\cr +Id string for object. +Default is the name of the R variable passed to \code{data}.} + +\item{data}{(\link{data.frame})\cr +A data frame containing the features and target variable(s).} + +\item{target}{(\code{character(1)} | \code{character(2)} | \code{character(n.classes)})\cr +Name(s) of the target variable(s). +For survival analysis these are the names of the survival time and event columns, +so it has length 2. For multilabel classification it contains the names of the logical +columns that encode whether a label is present or not and its length corresponds to the +number of classes.} + +\item{weights}{(\link{numeric})\cr +Optional, non-negative case weight vector to be used during fitting. +Cannot be set for cost-sensitive learning. +Default is \code{NULL} which means no (= equal) weights.} + +\item{blocking}{(\link{factor})\cr +An optional factor of the same length as the number of observations. +Observations with the same blocking level \dQuote{belong together}. +Specifically, they are either put all in the training or the test set +during a resampling iteration. +Default is \code{NULL} which means no blocking.} + +\item{coordinates}{(\link{data.frame})\cr +Coordinates of a spatial data set that will be used for spatial partitioning of the data in a spatial cross-validation resampling setting. +Coordinates have to be numeric values. +Provided \link{data.frame} needs to have the same number of rows as data and consist of at least two dimensions.} + +\item{fixup.data}{(\code{character(1)})\cr +Should some basic cleaning up of data be performed? +Currently this means removing empty factor levels for the columns. +Possible choices are: +\dQuote{no} = Don't do it. +\dQuote{warn} = Do it but warn about it. +\dQuote{quiet} = Do it but keep silent. +Default is \dQuote{warn}.} + +\item{check.data}{(\code{logical(1)})\cr +Should sanity of data be checked initially at task creation? +You should have good reasons to turn this off (one might be speed). +Default is \code{TRUE}.} +} +\description{ +Create a multilabel task. +} +\details{ +For multilabel classification we assume that the presence of labels is encoded via logical +columns in \code{data}. The name of the column specifies the name of the label. \code{target} +is then a char vector that points to these columns. +} +\section{Note}{ + +For multilabel classification we assume that the presence of labels is encoded via logical +columns in \code{data}. The name of the column specifies the name of the label. \code{target} +is then a char vector that points to these columns. +} + +\seealso{ +\link{Task} \link{ClassifTask} \link{CostSensTask} \link{ClusterTask} \link{RegrTask} \link{SurvTask} +} diff --git a/man/RegrTask.Rd b/man/RegrTask.Rd new file mode 100644 index 0000000000..75dad8f513 --- /dev/null +++ b/man/RegrTask.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RegrTask.R +\name{makeRegrTask} +\alias{makeRegrTask} +\title{Create a regression task.} +\usage{ +makeRegrTask(id = deparse(substitute(data)), data, target, + weights = NULL, blocking = NULL, coordinates = NULL, + fixup.data = "warn", check.data = TRUE) +} +\arguments{ +\item{id}{(\code{character(1)})\cr +Id string for object. +Default is the name of the R variable passed to \code{data}.} + +\item{data}{(\link{data.frame})\cr +A data frame containing the features and target variable(s).} + +\item{target}{(\code{character(1)} | \code{character(2)} | \code{character(n.classes)})\cr +Name(s) of the target variable(s). +For survival analysis these are the names of the survival time and event columns, +so it has length 2. For multilabel classification it contains the names of the logical +columns that encode whether a label is present or not and its length corresponds to the +number of classes.} + +\item{weights}{(\link{numeric})\cr +Optional, non-negative case weight vector to be used during fitting. +Cannot be set for cost-sensitive learning. +Default is \code{NULL} which means no (= equal) weights.} + +\item{blocking}{(\link{factor})\cr +An optional factor of the same length as the number of observations. +Observations with the same blocking level \dQuote{belong together}. +Specifically, they are either put all in the training or the test set +during a resampling iteration. +Default is \code{NULL} which means no blocking.} + +\item{coordinates}{(\link{data.frame})\cr +Coordinates of a spatial data set that will be used for spatial partitioning of the data in a spatial cross-validation resampling setting. +Coordinates have to be numeric values. +Provided \link{data.frame} needs to have the same number of rows as data and consist of at least two dimensions.} + +\item{fixup.data}{(\code{character(1)})\cr +Should some basic cleaning up of data be performed? +Currently this means removing empty factor levels for the columns. +Possible choices are: +\dQuote{no} = Don't do it. +\dQuote{warn} = Do it but warn about it. +\dQuote{quiet} = Do it but keep silent. +Default is \dQuote{warn}.} + +\item{check.data}{(\code{logical(1)})\cr +Should sanity of data be checked initially at task creation? +You should have good reasons to turn this off (one might be speed). +Default is \code{TRUE}.} +} +\description{ +Create a regression task. +} +\seealso{ +\link{Task} \link{ClassifTask} \link{CostSensTask} \link{ClusterTask} \link{MultilabelTask} \link{SurvTask} +} diff --git a/man/SurvTask.Rd b/man/SurvTask.Rd new file mode 100644 index 0000000000..4113ee620e --- /dev/null +++ b/man/SurvTask.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/SurvTask.R +\name{makeSurvTask} +\alias{makeSurvTask} +\title{Create a survival task.} +\usage{ +makeSurvTask(id = deparse(substitute(data)), data, target, + weights = NULL, blocking = NULL, coordinates = NULL, + fixup.data = "warn", check.data = TRUE) +} +\arguments{ +\item{id}{(\code{character(1)})\cr +Id string for object. +Default is the name of the R variable passed to \code{data}.} + +\item{data}{(\link{data.frame})\cr +A data frame containing the features and target variable(s).} + +\item{target}{(\code{character(1)} | \code{character(2)} | \code{character(n.classes)})\cr +Name(s) of the target variable(s). +For survival analysis these are the names of the survival time and event columns, +so it has length 2. For multilabel classification it contains the names of the logical +columns that encode whether a label is present or not and its length corresponds to the +number of classes.} + +\item{weights}{(\link{numeric})\cr +Optional, non-negative case weight vector to be used during fitting. +Cannot be set for cost-sensitive learning. +Default is \code{NULL} which means no (= equal) weights.} + +\item{blocking}{(\link{factor})\cr +An optional factor of the same length as the number of observations. +Observations with the same blocking level \dQuote{belong together}. +Specifically, they are either put all in the training or the test set +during a resampling iteration. +Default is \code{NULL} which means no blocking.} + +\item{coordinates}{(\link{data.frame})\cr +Coordinates of a spatial data set that will be used for spatial partitioning of the data in a spatial cross-validation resampling setting. +Coordinates have to be numeric values. +Provided \link{data.frame} needs to have the same number of rows as data and consist of at least two dimensions.} + +\item{fixup.data}{(\code{character(1)})\cr +Should some basic cleaning up of data be performed? +Currently this means removing empty factor levels for the columns. +Possible choices are: +\dQuote{no} = Don't do it. +\dQuote{warn} = Do it but warn about it. +\dQuote{quiet} = Do it but keep silent. +Default is \dQuote{warn}.} + +\item{check.data}{(\code{logical(1)})\cr +Should sanity of data be checked initially at task creation? +You should have good reasons to turn this off (one might be speed). +Default is \code{TRUE}.} +} +\description{ +Create a survival task. +} +\seealso{ +\link{Task} \link{ClassifTask} \link{CostSensTask} \link{ClusterTask} \link{MultilabelTask} \link{RegrTask} +} diff --git a/man/makeClusterTask.Rd b/man/makeClusterTask.Rd new file mode 100644 index 0000000000..7905c10ba4 --- /dev/null +++ b/man/makeClusterTask.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ClusterTask.R +\name{makeClusterTask} +\alias{makeClusterTask} +\title{Create a cluster task.} +\usage{ +makeClusterTask(id = deparse(substitute(data)), data, weights = NULL, + blocking = NULL, coordinates = NULL, fixup.data = "warn", + check.data = TRUE) +} +\arguments{ +\item{id}{(\code{character(1)})\cr +Id string for object. +Default is the name of the R variable passed to \code{data}.} + +\item{data}{(\link{data.frame})\cr +A data frame containing the features and target variable(s).} + +\item{weights}{(\link{numeric})\cr +Optional, non-negative case weight vector to be used during fitting. +Cannot be set for cost-sensitive learning. +Default is \code{NULL} which means no (= equal) weights.} + +\item{blocking}{(\link{factor})\cr +An optional factor of the same length as the number of observations. +Observations with the same blocking level \dQuote{belong together}. +Specifically, they are either put all in the training or the test set +during a resampling iteration. +Default is \code{NULL} which means no blocking.} + +\item{coordinates}{(\link{data.frame})\cr +Coordinates of a spatial data set that will be used for spatial partitioning of the data in a spatial cross-validation resampling setting. +Coordinates have to be numeric values. +Provided \link{data.frame} needs to have the same number of rows as data and consist of at least two dimensions.} + +\item{fixup.data}{(\code{character(1)})\cr +Should some basic cleaning up of data be performed? +Currently this means removing empty factor levels for the columns. +Possible choices are: +\dQuote{no} = Don't do it. +\dQuote{warn} = Do it but warn about it. +\dQuote{quiet} = Do it but keep silent. +Default is \dQuote{warn}.} + +\item{check.data}{(\code{logical(1)})\cr +Should sanity of data be checked initially at task creation? +You should have good reasons to turn this off (one might be speed). +Default is \code{TRUE}.} +} +\description{ +Create a cluster task. +} +\seealso{ +\link{Task} \link{ClassifTask} \link{CostSensTask} \link{MultilabelTask} \link{RegrTask} \link{SurvTask} +} diff --git a/tests/run-classif1.R b/tests/run-classif1.R index c1779b6e88..289e9aa598 100644 --- a/tests/run-classif1.R +++ b/tests/run-classif1.R @@ -2,4 +2,3 @@ library(testthat) if (identical(Sys.getenv("TRAVIS"), "True") || identical(Sys.getenv("APPVEYOR"), "True") || identical(Sys.getenv("R_EXPENSIVE_TEST_OK"), "true") || identical(Sys.getenv("NOT_CRAN"), "true")) { test_check("mlr", "_classif_[a-l].*") } - diff --git a/tests/run-classif2.R b/tests/run-classif2.R index 62793605a8..bbe0d30bbb 100644 --- a/tests/run-classif2.R +++ b/tests/run-classif2.R @@ -2,4 +2,3 @@ library(testthat) if (identical(Sys.getenv("TRAVIS"), "True") || identical(Sys.getenv("APPVEYOR"), "True") || identical(Sys.getenv("R_EXPENSIVE_TEST_OK"), "true") || identical(Sys.getenv("NOT_CRAN"), "true")) { test_check("mlr", "_classif_[k-z].*") } - diff --git a/tests/run-learners-classif.R b/tests/run-learners-classif.R index fb3bf7dcee..f24426d206 100644 --- a/tests/run-learners-classif.R +++ b/tests/run-learners-classif.R @@ -2,4 +2,3 @@ library(testthat) if (identical(Sys.getenv("TRAVIS"), "True") || identical(Sys.getenv("APPVEYOR"), "True") || identical(Sys.getenv("R_EXPENSIVE_TEST_OK"), "true") || identical(Sys.getenv("NOT_CRAN"), "true")) { test_check("mlr", "_learners_all_classif") } - diff --git a/tests/run-learners-classiflabelswitch.R b/tests/run-learners-classiflabelswitch.R index 36b17f7fa3..acee67976b 100644 --- a/tests/run-learners-classiflabelswitch.R +++ b/tests/run-learners-classiflabelswitch.R @@ -2,4 +2,3 @@ library(testthat) if (identical(Sys.getenv("TRAVIS"), "True") || identical(Sys.getenv("APPVEYOR"), "True") || identical(Sys.getenv("R_EXPENSIVE_TEST_OK"), "true") || identical(Sys.getenv("NOT_CRAN"), "true")) { test_check("mlr", "_learners_classiflabelswitch") } - diff --git a/tests/run-learners-cluster.R b/tests/run-learners-cluster.R index 5f8fcde19f..0683d7cc55 100644 --- a/tests/run-learners-cluster.R +++ b/tests/run-learners-cluster.R @@ -2,4 +2,3 @@ library(testthat) if (identical(Sys.getenv("TRAVIS"), "True") || identical(Sys.getenv("R_EXPENSIVE_TEST_OK"), "true") || identical(Sys.getenv("NOT_CRAN"), "true")) { test_check("mlr", "_learners_all_cluster") } - diff --git a/tests/run-learners-general.R b/tests/run-learners-general.R index f0b01af3bd..e7e5e4c77c 100644 --- a/tests/run-learners-general.R +++ b/tests/run-learners-general.R @@ -2,4 +2,3 @@ library(testthat) if (identical(Sys.getenv("TRAVIS"), "True") || identical(Sys.getenv("APPVEYOR"), "True") || identical(Sys.getenv("R_EXPENSIVE_TEST_OK"), "true") || identical(Sys.getenv("NOT_CRAN"), "true")) { test_check("mlr", "_learners_all_general") } - diff --git a/tests/run-learners-multilabel.R b/tests/run-learners-multilabel.R index 94fdad29d8..a3bdf204c1 100644 --- a/tests/run-learners-multilabel.R +++ b/tests/run-learners-multilabel.R @@ -2,6 +2,3 @@ library(testthat) if (identical(Sys.getenv("TRAVIS"), "True") || identical(Sys.getenv("APPVEYOR"), "True") || identical(Sys.getenv("R_EXPENSIVE_TEST_OK"), "true") || identical(Sys.getenv("NOT_CRAN"), "true")) { test_check("mlr", "_learners_all_multilabel") } - - - diff --git a/tests/run-learners-regr.R b/tests/run-learners-regr.R index 8748495195..0a39106aba 100644 --- a/tests/run-learners-regr.R +++ b/tests/run-learners-regr.R @@ -2,5 +2,3 @@ library(testthat) if (identical(Sys.getenv("TRAVIS"), "True") || identical(Sys.getenv("APPVEYOR"), "True") || identical(Sys.getenv("R_EXPENSIVE_TEST_OK"), "true") || identical(Sys.getenv("NOT_CRAN"), "true")) { test_check("mlr", "_learners_all_regr") } - - diff --git a/tests/run-learners-surv.R b/tests/run-learners-surv.R index fc2435892f..21a0ba490f 100644 --- a/tests/run-learners-surv.R +++ b/tests/run-learners-surv.R @@ -2,6 +2,3 @@ library(testthat) if (identical(Sys.getenv("TRAVIS"), "True") || identical(Sys.getenv("APPVEYOR"), "True") || identical(Sys.getenv("R_EXPENSIVE_TEST_OK"), "true") || identical(Sys.getenv("NOT_CRAN"), "true")) { test_check("mlr", "_learners_all_surv") } - - - diff --git a/tests/run-lint.R b/tests/run-lint.R index aab67689c5..79ee61232f 100644 --- a/tests/run-lint.R +++ b/tests/run-lint.R @@ -1,3 +1,2 @@ library(testthat) test_check("mlr", filter = "lint") - diff --git a/tests/run-multilabel.R b/tests/run-multilabel.R index 9a7a6a4d85..4ce7ba25d5 100644 --- a/tests/run-multilabel.R +++ b/tests/run-multilabel.R @@ -2,4 +2,3 @@ library(testthat) if (identical(Sys.getenv("TRAVIS"), "True") || identical(Sys.getenv("APPVEYOR"), "True") || identical(Sys.getenv("R_EXPENSIVE_TEST_OK"), "true") || identical(Sys.getenv("NOT_CRAN"), "true")) { test_check("mlr", "_multilabel_") } - diff --git a/tests/run-parallel.R b/tests/run-parallel.R index 6cefc32ece..0a6620a960 100644 --- a/tests/run-parallel.R +++ b/tests/run-parallel.R @@ -4,4 +4,3 @@ library(testthat) if (identical(Sys.getenv("R_EXPENSIVE_TEST_OK"), "true")) { test_check("mlr", "_parallel_") } - diff --git a/tests/testthat/helper_helpers.R b/tests/testthat/helper_helpers.R index 53e348381b..0124f0869b 100644 --- a/tests/testthat/helper_helpers.R +++ b/tests/testthat/helper_helpers.R @@ -1,11 +1,14 @@ requirePackagesOrSkip = function(packs, default.method = "attach") { + ok = requirePackages(packs, why = "unit test", stop = FALSE, suppress.warnings = TRUE, default.method = default.method) - if (any(!ok)) + if (any(!ok)) { skip(sprintf("Required packages not installed: %s", collapse(names(ok)[!ok]))) + } invisible(TRUE) } e1071CVToMlrCV = function(e1071.tune.result) { + tr = e1071.tune.result inds = tr$train.ind size = max(unlist(inds)) @@ -23,6 +26,7 @@ e1071CVToMlrCV = function(e1071.tune.result) { e1071BootstrapToMlrBootstrap = function(e1071.tune.result) { + tr = e1071.tune.result inds = tr$train.ind @@ -41,24 +45,26 @@ e1071BootstrapToMlrBootstrap = function(e1071.tune.result) { testSimple = function(t.name, df, target, train.inds, old.predicts, parset = list()) { + inds = train.inds train = df[inds, ] test = df[-inds, ] lrn = do.call("makeLearner", c(list(t.name), parset)) # FIXME this heuristic will backfire eventually - if (length(target) == 0) + if (length(target) == 0) { task = makeClusterTask(data = df) - else if (is.numeric(df[, target])) + } else if (is.numeric(df[, target])) { task = makeRegrTask(data = df, target = target) - else if (is.factor(df[, target])) + } else if (is.factor(df[, target])) { task = makeClassifTask(data = df, target = target) - else if (is.data.frame(df[, target]) && is.numeric(df[, target[1L]]) && is.logical(df[, target[2L]])) + } else if (is.data.frame(df[, target]) && is.numeric(df[, target[1L]]) && is.logical(df[, target[2L]])) { task = makeSurvTask(data = df, target = target) - else if (is.data.frame(df[, target]) && is.logical(df[, target[1L]])) + } else if (is.data.frame(df[, target]) && is.logical(df[, target[1L]])) { task = makeMultilabelTask(data = df, target = target) - else + } else { stop("Should not happen!") + } m = try(train(lrn, task, subset = inds)) if (inherits(m, "FailureModel")) { @@ -70,21 +76,23 @@ testSimple = function(t.name, df, target, train.inds, old.predicts, parset = lis rownames(cp$data) = NULL expect_equal(unname(cp$data[, substr(colnames(cp$data), 1, 8) == "response"]), unname(old.predicts)) } else { - # to avoid issues with dropped levels in the class factor we only check the elements as chars - if (is.numeric(cp$data$response) && is.numeric(old.predicts)) - if (lrn$predict.type == "se") { - expect_equal(unname(cbind(cp$data$response, cp$data$se)), unname(old.predicts), tol = 1e-5) + # to avoid issues with dropped levels in the class factor we only check the elements as chars + if (is.numeric(cp$data$response) && is.numeric(old.predicts)) { + if (lrn$predict.type == "se") { + expect_equal(unname(cbind(cp$data$response, cp$data$se)), unname(old.predicts), tol = 1e-5) + } else { + expect_equal(unname(cp$data$response), unname(old.predicts), tol = 1e-5) + } } else { - expect_equal(unname(cp$data$response), unname(old.predicts), tol = 1e-5) + expect_equal(as.character(cp$data$response), as.character(old.predicts)) } - else - expect_equal(as.character(cp$data$response), as.character(old.predicts)) } } } testSimpleParsets = function(t.name, df, target, train.inds, old.predicts.list, parset.list) { + inds = train.inds train = df[inds, ] test = df[-inds, ] @@ -98,6 +106,7 @@ testSimpleParsets = function(t.name, df, target, train.inds, old.predicts.list, testProb = function(t.name, df, target, train.inds, old.probs, parset = list()) { + inds = train.inds train = df[inds, ] test = df[-inds, ] @@ -112,17 +121,19 @@ testProb = function(t.name, df, target, train.inds, old.probs, parset = list()) if (inherits(m, "FailureModel")) { expect_is(old.predicts, "try-error") - } else{ + } else { cp = predict(m, newdata = test) # dont need names for num vector, 2 classes - if (is.numeric(old.probs)) + if (is.numeric(old.probs)) { names(old.probs) = NULL - else + } else { old.probs = as.matrix(old.probs) + } p = getPredictionProbabilities(cp) - if (is.data.frame(p)) + if (is.data.frame(p)) { p = as.matrix(p) + } # we change names a bit so dont check them colnames(p) = colnames(old.probs) = NULL rownames(p) = rownames(old.probs) = NULL @@ -134,6 +145,7 @@ testProb = function(t.name, df, target, train.inds, old.probs, parset = list()) testProbWithTol = function(t.name, df, target, train.inds, old.probs, parset = list(), tol = 1e-04) { + inds = train.inds train = df[inds, ] test = df[-inds, ] @@ -148,17 +160,19 @@ testProbWithTol = function(t.name, df, target, train.inds, old.probs, parset = l if (inherits(m, "FailureModel")) { expect_is(old.predicts, "try-error") - } else{ + } else { cp = predict(m, newdata = test) # dont need names for num vector, 2 classes - if (is.numeric(old.probs)) + if (is.numeric(old.probs)) { names(old.probs) = NULL - else + } else { old.probs = as.matrix(old.probs) + } p = getPredictionProbabilities(cp) - if (is.data.frame(p)) + if (is.data.frame(p)) { p = as.matrix(p) + } # we change names a bit so dont check them colnames(p) = colnames(old.probs) = NULL rownames(p) = rownames(old.probs) = NULL @@ -169,6 +183,7 @@ testProbWithTol = function(t.name, df, target, train.inds, old.probs, parset = l testProbParsets = function(t.name, df, target, train.inds, old.probs.list, parset.list) { + inds = train.inds train = df[inds, ] test = df[-inds, ] @@ -183,6 +198,7 @@ testProbParsets = function(t.name, df, target, train.inds, old.probs.list, parse testProbParsetsWithTol = function(t.name, df, target, train.inds, old.probs.list, parset.list, tol = 1e-04) { + inds = train.inds train = df[inds, ] test = df[-inds, ] @@ -196,11 +212,13 @@ testProbParsetsWithTol = function(t.name, df, target, train.inds, old.probs.list testCV = function(t.name, df, target, folds = 2, parset = list(), tune.train, tune.predict = predict) { + requirePackages("e1071", default.method = "load") data = df formula = formula(paste(target, "~.")) tt = function(formula, data, subset = seq_len(nrow(data)), ...) { + pars = list(formula = formula, data = data[subset, ]) pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) @@ -211,6 +229,7 @@ testCV = function(t.name, df, target, folds = 2, parset = list(), tune.train, tu } tp = function(model, newdata) { + set.seed(getOption("mlr.debug.seed")) p = tune.predict(model, newdata) return(p) @@ -220,10 +239,11 @@ testCV = function(t.name, df, target, folds = 2, parset = list(), tune.train, tu cv.instance = e1071CVToMlrCV(tr) lrn = do.call("makeLearner", c(t.name, parset)) - if (is.numeric(df[, target])) + if (is.numeric(df[, target])) { task = makeRegrTask(data = df, target = target) - else if (is.factor(df[, target])) + } else if (is.factor(df[, target])) { task = makeClassifTask(data = df, target = target) + } ms = resample(lrn, task, cv.instance)$measures.test if (inherits(task, "ClassifTask")) { expect_equal(mean(ms[, "mmce"]), tr$performances[1, 2], check.names = FALSE) @@ -246,6 +266,7 @@ testCVParsets = function(t.name, df, target, folds = 2, tune.train, tune.predict testBootstrap = function(t.name, df, target, iters = 3, parset = list(), tune.train, tune.predict = predict) { + requirePackages("e1071", default.method = "load") data = df formula = formula(paste(target, "~.")) @@ -255,10 +276,11 @@ testBootstrap = function(t.name, df, target, iters = 3, parset = list(), tune.tr bs.instance = e1071BootstrapToMlrBootstrap(tr) lrn = do.call("makeLearner", c(t.name, parset)) - if (is.numeric(df[, target])) + if (is.numeric(df[, target])) { task = makeRegrTask(data = df, target = target) - else if (is.factor(df[, target])) + } else if (is.factor(df[, target])) { task = makeClassifTask(data = df, target = target) + } ms = resample(lrn, task, bs.instance)$measures.test if (inherits(task, "ClassifTask")) { expect_equal(mean(ms[, "mmce"]), tr$performances[1, 2], check.names = FALSE) @@ -271,6 +293,7 @@ testBootstrap = function(t.name, df, target, iters = 3, parset = list(), tune.tr mylist = function(..., create = FALSE) { + lrns = listLearners(..., create = create) if (create) { ids = BBmisc::extractSubList(lrns, "id") @@ -282,11 +305,13 @@ mylist = function(..., create = FALSE) { } testFacetting = function(obj, nrow = NULL, ncol = NULL) { + expect_equal(obj$facet$params$nrow, nrow) expect_equal(obj$facet$params$ncol, ncol) } testDocForStrings = function(doc, x, grid.size = 1L, ordered = FALSE) { + text.paths = paste("/svg:svg//svg:text[text()[contains(., '", x, "')]]", sep = "") nodes = XML::getNodeSet(doc, text.paths, ns.svg) @@ -298,6 +323,7 @@ testDocForStrings = function(doc, x, grid.size = 1L, ordered = FALSE) { } constant05Resample = function(...) { + res = resample(...) res$aggr = rep(0.5, length(res$aggr)) res @@ -305,6 +331,9 @@ constant05Resample = function(...) { # evaluate expr without giving its output. quiet = function(expr) { - capture.output({ret = expr}) + + capture.output({ + ret = expr + }) ret } diff --git a/tests/testthat/helper_learners_all.R b/tests/testthat/helper_learners_all.R index d9dc37466a..49ba020c91 100644 --- a/tests/testthat/helper_learners_all.R +++ b/tests/testthat/helper_learners_all.R @@ -17,8 +17,9 @@ testThatLearnerRespectsWeights = function(lrn, task, train.inds, test.inds, weig lrn = setPredictType(lrn, pred.type) - if (lrn$id %in% names(hyperpars)) + if (lrn$id %in% names(hyperpars)) { lrn = setHyperPars(lrn, par.vals = hyperpars[[lrn$id]]) + } rin = makeResampleInstance("Holdout", task = task) m1 = train(lrn, task, subset = train.inds) @@ -55,10 +56,12 @@ testThatLearnerRespectsWeights = function(lrn, task, train.inds, test.inds, weig # predict standard errors.) testBasicLearnerProperties = function(lrn, task, hyperpars, pred.type = "response") { + # handling special par.vals and predict type info = lrn$id - if (lrn$id %in% names(hyperpars)) + if (lrn$id %in% names(hyperpars)) { lrn = setHyperPars(lrn, par.vals = hyperpars[[lrn$id]]) + } lrn = setPredictType(lrn, pred.type) @@ -159,6 +162,7 @@ testThatLearnerHandlesMissings = function(lrn, task, hyperpars) { # this works correctly testThatGetOOBPredsWorks = function(lrn, task) { + type = lrn$type mod = train(lrn, task) oob = getOOBPreds(mod, task) @@ -181,24 +185,29 @@ testThatGetOOBPredsWorks = function(lrn, task) { testThatLearnerCanCalculateImportance = function(lrn, task, hyperpars) { - - if (lrn$id %in% names(hyperpars)) + if (lrn$id %in% names(hyperpars)) { lrn = setHyperPars(lrn, par.vals = hyperpars[[lrn$id]]) + } # some learners need special param settings to compute variable importance # add them here if you implement a measure that requires that. # you may also want to change the params for the learner if training takes # a long time - if (lrn$short.name == "ranger") + if (lrn$short.name == "ranger") { lrn = setHyperPars(lrn, importance = "permutation") - if (lrn$short.name == "adabag") + } + if (lrn$short.name == "adabag") { lrn = setHyperPars(lrn, mfinal = 5L) - if (lrn$short.name == "cforest") + } + if (lrn$short.name == "cforest") { lrn = setHyperPars(lrn, ntree = 5L) - if (lrn$short.name == "rfsrc") + } + if (lrn$short.name == "rfsrc") { lrn = setHyperPars(lrn, ntree = 5L) - if (lrn$short.name == "xgboost") + } + if (lrn$short.name == "xgboost") { lrn = setHyperPars(lrn, nrounds = 10L) + } mod = train(lrn, task) feat.imp = getFeatureImportance(mod)$res @@ -210,12 +219,14 @@ testThatLearnerCanCalculateImportance = function(lrn, task, hyperpars) { testThatLearnerParamDefaultsAreInParamSet = function(lrn) { + pars = lrn$par.set$pars pv = lrn$par.vals expect_true(isSubset(names(pv), names(pars))) } testThatLearnerPredictsFeasibleSEValues = function(lrn, task) { + lrn = setPredictType(lrn, "se") res = resample(lrn, task, makeResampleDesc("LOO")) ses = getPredictionSE(res$pred) diff --git a/tests/testthat/helper_lint.R b/tests/testthat/helper_lint.R index d8075d0292..f23915fc48 100644 --- a/tests/testthat/helper_lint.R +++ b/tests/testthat/helper_lint.R @@ -2,6 +2,7 @@ # check if lintr version is sufficient # if `error.if.not` is TRUE an error is thrown with a meaningful message. isLintrVersionOk = function(error.if.not = FALSE) { + lintr.ver = try(packageVersion("lintr"), silent = TRUE) lintr.required = "1.0.0.9001" if (inherits(lintr.ver, "try-error")) { @@ -14,7 +15,7 @@ isLintrVersionOk = function(error.if.not = FALSE) { } if (error.if.not) { stopf(paste("%s\nInstalling the github version of lintr will probably solve this issue. For that, please run", - "> devtools::install_github(\"jimhester/lintr\")", sep = "\n"), msg) + "> devtools::install_github(\"jimhester/lintr\")", sep = "\n"), msg) } return(FALSE) } @@ -54,24 +55,28 @@ if (isLintrVersionOk() && require("lintr", quietly = TRUE) && require("rex", qui # prohibit <- left.assign.linter = function(source_file) { + lapply(lintr:::ids_with_token(source_file, "LEFT_ASSIGN"), function(id) { - parsed = lintr:::with_id(source_file, id) - if (parsed$text == ":=") return(NULL) # ':=' is also a LEFT_ASSIGN token for some reason - Lint(filename = source_file$filename, line_number = parsed$line1, - column_number = parsed$col1, type = "style", message = "Use =, not <-, for assignment.", - line = source_file$lines[as.character(parsed$line1)], - linter = "assignment_linter") + + parsed = lintr:::with_id(source_file, id) + if (parsed$text == ":=") return(NULL) # ':=' is also a LEFT_ASSIGN token for some reason + Lint(filename = source_file$filename, line_number = parsed$line1, + column_number = parsed$col1, type = "style", message = "Use =, not <-, for assignment.", + line = source_file$lines[as.character(parsed$line1)], + linter = "assignment_linter") }) } # prohibit -> right.assign.linter = function(source_file) { + lapply(lintr:::ids_with_token(source_file, "RIGHT_ASSIGN"), function(id) { - parsed = lintr:::with_id(source_file, id) - Lint(filename = source_file$filename, line_number = parsed$line1, - column_number = parsed$col1, type = "style", message = "Use =, not ->, for assignment.", - line = source_file$lines[as.character(parsed$line1)], - linter = "assignment_linter") + + parsed = lintr:::with_id(source_file, id) + Lint(filename = source_file$filename, line_number = parsed$line1, + column_number = parsed$col1, type = "style", message = "Use =, not ->, for assignment.", + line = source_file$lines[as.character(parsed$line1)], + linter = "assignment_linter") }) } @@ -79,40 +84,43 @@ if (isLintrVersionOk() && require("lintr", quietly = TRUE) && require("rex", qui `%==%` = lintr:::`%==%` spaces.left.parentheses.linter = function(source_file) { - lapply(lintr:::ids_with_token(source_file, "'('"), function(id) { - parsed = source_file$parsed_content[id, ] - terminal.tokens.before = source_file$parsed_content$token[source_file$parsed_content$line1 == - parsed$line1 & source_file$parsed_content$col1 < - parsed$col1 & source_file$parsed_content$terminal] - last.type = tail(terminal.tokens.before, n = 1) - is.function = length(last.type) %!=% 0L && (last.type %in% - c("SYMBOL_FUNCTION_CALL", "FUNCTION", "'}'", "')'", - "']'")) - if (!is.function) { - line = source_file$lines[as.character(parsed$line1)] - before.operator = substr(line, parsed$col1 - 1L, - parsed$col1 - 1L) - non.space.before = re_matches(before.operator, rex(non_space)) - not.exception = !(before.operator %in% c("!", ":", - "[", "(")) - if (non.space.before && not.exception) { - Lint(filename = source_file$filename, line_number = parsed$line1, - column_number = parsed$col1, type = "style", - message = "Place a space before left parenthesis, except in a function call.", - line = line, linter = "spaces.left.parentheses.linter") - } - } - }) + + lapply(lintr:::ids_with_token(source_file, "'('"), function(id) { + + parsed = source_file$parsed_content[id, ] + terminal.tokens.before = source_file$parsed_content$token[source_file$parsed_content$line1 == + parsed$line1 & source_file$parsed_content$col1 < + parsed$col1 & source_file$parsed_content$terminal] + last.type = tail(terminal.tokens.before, n = 1) + is.function = length(last.type) %!=% 0L && (last.type %in% + c("SYMBOL_FUNCTION_CALL", "FUNCTION", "'}'", "')'", + "']'")) + if (!is.function) { + line = source_file$lines[as.character(parsed$line1)] + before.operator = substr(line, parsed$col1 - 1L, + parsed$col1 - 1L) + non.space.before = re_matches(before.operator, rex(non_space)) + not.exception = !(before.operator %in% c("!", ":", + "[", "(")) + if (non.space.before && not.exception) { + Lint(filename = source_file$filename, line_number = parsed$line1, + column_number = parsed$col1, type = "style", + message = "Place a space before left parenthesis, except in a function call.", + line = line, linter = "spaces.left.parentheses.linter") + } + } + }) } function.left.parentheses.linter = function(source_file) { + lapply(lintr:::ids_with_token(source_file, "'('"), function(id) { parsed = source_file$parsed_content[id, ] ttb = which(source_file$parsed_content$line1 == parsed$line1 & - source_file$parsed_content$col1 < parsed$col1 & - source_file$parsed_content$terminal) + source_file$parsed_content$col1 < parsed$col1 & + source_file$parsed_content$terminal) ttb = tail(ttb, n = 1) last.type = source_file$parsed_content$token[ttb] @@ -128,8 +136,8 @@ if (isLintrVersionOk() && require("lintr", quietly = TRUE) && require("rex", qui opparsed = source_file$parsed_content[opening.paren.pos, ] opttb = which(source_file$parsed_content$line1 == opparsed$line1 & - source_file$parsed_content$col1 < opparsed$col1 & - source_file$parsed_content$terminal) + source_file$parsed_content$col1 < opparsed$col1 & + source_file$parsed_content$terminal) opttb = tail(opttb, n = 1) before.op.type = source_file$parsed_content$token[opttb] if (length(before.op.type) %!=% 0L && before.op.type == "FUNCTION") { @@ -153,7 +161,7 @@ if (isLintrVersionOk() && require("lintr", quietly = TRUE) && require("rex", qui message = "Remove spaces before the left parenthesis in a function call.", line = line, linter = "function_left_parentheses" - ) + ) } } @@ -161,39 +169,41 @@ if (isLintrVersionOk() && require("lintr", quietly = TRUE) && require("rex", qui } infix.spaces.linter = function(source_file) { - lapply(lintr:::ids_with_token(source_file, lintr:::infix_tokens, fun = `%in%`), - function(id) { - parsed = lintr:::with_id(source_file, id) - line = source_file$lines[as.character(parsed$line1)] - if (substr(line, parsed$col1, parsed$col2) == "^") { - return(NULL) - } - around.operator = substr(line, parsed$col1 - 1L, - parsed$col2 + 1L) - non.space.before = re_matches(around.operator, rex(start, - non_space)) - newline.after = unname(nchar(line)) %==% parsed$col2 - non.space.after = re_matches(around.operator, rex(non_space, - end)) - if (non.space.before || (!newline.after && non.space.after)) { - is.infix = length(lintr:::siblings(source_file$parsed_content, - parsed$id, 1)) > 1L - start = end = parsed$col1 - if (is.infix) { - if (non.space.before) { - start = parsed$col1 - 1L - } - if (non.space.after) { - end = parsed$col2 + 1L - } - Lint(filename = source_file$filename, line_number = parsed$line1, - column_number = parsed$col1, type = "style", - message = "Put spaces around all infix operators (except exponentiation).", - line = line, ranges = list(c(start, end)), - linter = "infix.spaces.linter") - } - } - }) + + lapply(lintr:::ids_with_token(source_file, lintr:::infix_tokens, fun = `%in%`), + function(id) { + + parsed = lintr:::with_id(source_file, id) + line = source_file$lines[as.character(parsed$line1)] + if (substr(line, parsed$col1, parsed$col2) == "^") { + return(NULL) + } + around.operator = substr(line, parsed$col1 - 1L, + parsed$col2 + 1L) + non.space.before = re_matches(around.operator, rex(start, + non_space)) + newline.after = unname(nchar(line)) %==% parsed$col2 + non.space.after = re_matches(around.operator, rex(non_space, + end)) + if (non.space.before || (!newline.after && non.space.after)) { + is.infix = length(lintr:::siblings(source_file$parsed_content, + parsed$id, 1)) > 1L + start = end = parsed$col1 + if (is.infix) { + if (non.space.before) { + start = parsed$col1 - 1L + } + if (non.space.after) { + end = parsed$col2 + 1L + } + Lint(filename = source_file$filename, line_number = parsed$line1, + column_number = parsed$col1, type = "style", + message = "Put spaces around all infix operators (except exponentiation).", + line = line, ranges = list(c(start, end)), + linter = "infix.spaces.linter") + } + } + }) } @@ -203,21 +213,22 @@ if (isLintrVersionOk() && require("lintr", quietly = TRUE) && require("rex", qui style.regexes = list( "UpperCamelCase" = rex::rex(start, upper, zero_or_more(alnum), end), "lowerCamelCase" = rex::rex(start, lower, zero_or_more(alnum), end), - "snake_case" = rex::rex(start, one_or_more(loweralnum), zero_or_more("_", one_or_more(loweralnum)), end), - "dotted.case" = rex::rex(start, one_or_more(loweralnum), zero_or_more(dot, one_or_more(loweralnum)), end), - "alllowercase" = rex::rex(start, one_or_more(loweralnum), end), - "ALLUPPERCASE" = rex::rex(start, one_or_more(upperalnum), end), + "snake_case" = rex::rex(start, one_or_more(loweralnum), zero_or_more("_", one_or_more(loweralnum)), end), + "dotted.case" = rex::rex(start, one_or_more(loweralnum), zero_or_more(dot, one_or_more(loweralnum)), end), + "alllowercase" = rex::rex(start, one_or_more(loweralnum), end), + "ALLUPPERCASE" = rex::rex(start, one_or_more(upperalnum), end), "functionCamel.case" = rex::rex(start, lower, zero_or_more(alnum), zero_or_more(dot, one_or_more(alnum)), end) ) # incorporate our own camelCase.withDots style. - matchesStyles = function(name, styles=names(style.regexes)) { + matchesStyles = function(name, styles = names(style.regexes)) { + invalids = paste(styles[!styles %in% names(style.regexes)], collapse = ", ") if (nzchar(invalids)) { valids = paste(names(style.regexes), collapse = ", ") stop(sprintf("Invalid style(s) requested: %s\nValid styles are: %s\n", invalids, valids)) } - name = re_substitutes(name, rex(start, one_or_more(dot)), "") # remove leading dots + name = re_substitutes(name, rex(start, one_or_more(dot)), "") # remove leading dots vapply( style.regexes[styles], re_matches, @@ -227,6 +238,7 @@ if (isLintrVersionOk() && require("lintr", quietly = TRUE) && require("rex", qui } object.naming.linter = lintr:::make_object_linter(function(source_file, token) { + sp = source_file$parsed_content if (tail(c("", sp$token[sp$terminal & sp$id < token$id]), n = 1) == "'$'") { # ignore list member names @@ -239,7 +251,7 @@ if (isLintrVersionOk() && require("lintr", quietly = TRUE) && require("rex", qui return(NULL) } if (sp$text[1] == ":=") { - return(NULL) # ':=' is parsed as LEFT_ASSIGN but does no actual assignment. + return(NULL) # ':=' is parsed as LEFT_ASSIGN but does no actual assignment. } style = ifelse(sp$token[2] == "FUNCTION", "functionCamel.case", "dotted.case") name = lintr:::unquote(token[["text"]]) @@ -257,8 +269,8 @@ if (isLintrVersionOk() && require("lintr", quietly = TRUE) && require("rex", qui # note that this must be a *named* list (bug in lintr) linters = list( commas = lintr::commas_linter, - # open.curly = open_curly_linter(), - # closed.curly = closed_curly_linter(), + # open.curly = open_curly_linter(), + # closed.curly = closed_curly_linter(), spaces.left.parentheses = spaces.left.parentheses.linter, function.left.parentheses = function.left.parentheses.linter, single.quotes = lintr::single_quotes_linter, @@ -266,18 +278,22 @@ if (isLintrVersionOk() && require("lintr", quietly = TRUE) && require("rex", qui right.assign = right.assign.linter, no.tab = lintr::no_tab_linter, trailing.whitespace = lintr::trailing_whitespace_linter, - #todo.comment = lintr::todo_comment_linter(todo = "todo"), # is case-insensitive + # todo.comment = lintr::todo_comment_linter(todo = "todo"), # is case-insensitive spaces.inside = lintr::spaces_inside_linter, infix.spaces = infix.spaces.linter, object.naming = object.naming.linter) - if (exists("T_and_F_symbol_linter", where = "package:lintr")) - linters$T.and.F.symbol = lintr::T_and_F_symbol_linter - if (exists("semicolon_terminator_linter", where = "package:lintr")) - linters$semicolon.terminator = lintr::semicolon_terminator_linter - if (exists("seq_lintr", where = "package:lintr")) - linters$seq = lintr::seq_lintr - if (exists("unneeded_concatenation_linter", where = "package:lintr")) - linters$unneeded.concatenation = lintr::unneeded_concatenation_linter + if (exists("T_and_F_symbol_linter", where = "package:lintr")) { + linters$T.and.F.symbol = lintr::T_and_F_symbol_linter + } + if (exists("semicolon_terminator_linter", where = "package:lintr")) { + linters$semicolon.terminator = lintr::semicolon_terminator_linter + } + if (exists("seq_lintr", where = "package:lintr")) { + linters$seq = lintr::seq_lintr + } + if (exists("unneeded_concatenation_linter", where = "package:lintr")) { + linters$unneeded.concatenation = lintr::unneeded_concatenation_linter + } } else { # everything that uses `linters` should check `isLintrVersionOk` first, so the # following should never be used. Make sure that it is an error if it IS used. diff --git a/tests/testthat/helper_mock_learners.R b/tests/testthat/helper_mock_learners.R index 8f6e1f01d0..2de4b213f8 100644 --- a/tests/testthat/helper_mock_learners.R +++ b/tests/testthat/helper_mock_learners.R @@ -1,18 +1,22 @@ # learner with error "foo" in predict -makeRLearner.classif.__mlrmocklearners__1 = function() { # nolint +makeRLearner.classif.__mlrmocklearners__1 = function() { + + # nolint makeRLearnerClassif( cl = "classif.__mlrmocklearners__1", package = character(0L), par.set = makeParamSet(), properties = c("twoclass", "multiclass", "missings", "numerics", "factors", "prob") ) } -trainLearner.classif.__mlrmocklearners__1 = function(.learner, .task, .subset, .weights = NULL, ...) list() # nolint -predictLearner.classif.__mlrmocklearners__1 = function(.learner, .model, .newdata, ...) stop("foo") # nolint +trainLearner.classif.__mlrmocklearners__1 = function(.learner, .task, .subset, .weights = NULL, ...) list() # nolint +predictLearner.classif.__mlrmocklearners__1 = function(.learner, .model, .newdata, ...) stop("foo") # nolint registerS3method("makeRLearner", "classif.__mlrmocklearners__1", makeRLearner.classif.__mlrmocklearners__1) registerS3method("trainLearner", "classif.__mlrmocklearners__1", trainLearner.classif.__mlrmocklearners__1) registerS3method("predictLearner", "classif.__mlrmocklearners__1", predictLearner.classif.__mlrmocklearners__1) # for tuning, produces errors en masse -makeRLearner.classif.__mlrmocklearners__2 = function() { # nolint +makeRLearner.classif.__mlrmocklearners__2 = function() { + + # nolint makeRLearnerClassif( cl = "classif.__mlrmocklearners__2", package = character(0L), par.set = makeParamSet( @@ -21,12 +25,17 @@ makeRLearner.classif.__mlrmocklearners__2 = function() { # nolint properties = c("twoclass", "multiclass", "missings", "numerics", "factors", "prob") ) } -trainLearner.classif.__mlrmocklearners__2 = function(.learner, .task, .subset, .weights = NULL, alpha, ...) { # nolint - if (alpha < 0.5) +trainLearner.classif.__mlrmocklearners__2 = function(.learner, .task, .subset, .weights = NULL, alpha, ...) { + + # nolint + if (alpha < 0.5) { stop("foo") + } list() } -predictLearner.classif.__mlrmocklearners__2 = function(.learner, .model, .newdata, ...) { # nolint +predictLearner.classif.__mlrmocklearners__2 = function(.learner, .model, .newdata, ...) { + + # nolint as.factor(sample(.model$task.desc$class.levels, nrow(.newdata), replace = TRUE)) } registerS3method("makeRLearner", "classif.__mlrmocklearners__2", makeRLearner.classif.__mlrmocklearners__2) @@ -35,20 +44,24 @@ registerS3method("predictLearner", "classif.__mlrmocklearners__2", predictLearne # learner with error "foo" in train -makeRLearner.classif.__mlrmocklearners__3 = function() { # nolint +makeRLearner.classif.__mlrmocklearners__3 = function() { + + # nolint makeRLearnerClassif( cl = "classif.__mlrmocklearners__3", package = character(0L), par.set = makeParamSet(), properties = c("twoclass", "multiclass", "missings", "numerics", "factors", "prob") ) } -trainLearner.classif.__mlrmocklearners__3 = function(.learner, .task, .subset, .weights = NULL, ...) stop("foo") # nolint -predictLearner.classif.__mlrmocklearners__3 = function(.learner, .model, .newdata, ...) 1L # nolint +trainLearner.classif.__mlrmocklearners__3 = function(.learner, .task, .subset, .weights = NULL, ...) stop("foo") # nolint +predictLearner.classif.__mlrmocklearners__3 = function(.learner, .model, .newdata, ...) 1L # nolint registerS3method("makeRLearner", "classif.__mlrmocklearners__3", makeRLearner.classif.__mlrmocklearners__3) registerS3method("trainLearner", "classif.__mlrmocklearners__3", trainLearner.classif.__mlrmocklearners__3) registerS3method("predictLearner", "classif.__mlrmocklearners__3", predictLearner.classif.__mlrmocklearners__3) # learner with different "when" settings for hyperpars -makeRLearner.regr.__mlrmocklearners__4 = function() { # nolint +makeRLearner.regr.__mlrmocklearners__4 = function() { + + # nolint makeRLearnerRegr( cl = "regr.__mlrmocklearners__4", package = character(0L), par.set = makeParamSet( @@ -60,11 +73,15 @@ makeRLearner.regr.__mlrmocklearners__4 = function() { # nolint ) } -trainLearner.regr.__mlrmocklearners__4 = function(.learner, .task, .subset, .weights = NULL, p1, p3, ...) { # nolint +trainLearner.regr.__mlrmocklearners__4 = function(.learner, .task, .subset, .weights = NULL, p1, p3, ...) { + + # nolint list(foo = p1 + p3) } -predictLearner.regr.__mlrmocklearners__4 = function(.learner, .model, .newdata, p2, p3) { # nolint +predictLearner.regr.__mlrmocklearners__4 = function(.learner, .model, .newdata, p2, p3) { + + # nolint y = rep(1, nrow(.newdata)) y * .model$learner.model$foo + p2 + p3 } @@ -74,7 +91,9 @@ registerS3method("predictLearner", "regr.__mlrmocklearners__4", predictLearner.r # Learner cannot use expression in param requires -makeRLearner.classif.__mlrmocklearners__5 = function() { # nolint +makeRLearner.classif.__mlrmocklearners__5 = function() { + + # nolint makeRLearnerClassif( cl = "classif.__mlrmocklearners__5", package = "mlr", @@ -86,9 +105,13 @@ makeRLearner.classif.__mlrmocklearners__5 = function() { # nolint ) } -trainLearner.classif.__mlrmocklearners__5 = function(.learner, .task, .subset, .weights = NULL, ...) { } # nolint +trainLearner.classif.__mlrmocklearners__5 = function(.learner, .task, .subset, .weights = NULL, ...) { + +} # nolint + +predictLearner.classif.__mlrmocklearners__5 = function(.learner, .model, .newdata) { -predictLearner.classif.__mlrmocklearners__5 = function(.learner, .model, .newdata) { # nolint + # nolint rep(factor(.model$factor.levels[[.model$task.desc$target]][1]), nrow(.newdata)) } registerS3method("makeRLearner", "classif.__mlrmocklearners__5", makeRLearner.classif.__mlrmocklearners__5) @@ -96,7 +119,9 @@ registerS3method("trainLearner", "classif.__mlrmocklearners__5", trainLearner.cl registerS3method("predictLearner", "classif.__mlrmocklearners__5", predictLearner.classif.__mlrmocklearners__5) # stores weights internally so we can see wether they are correctly passed down -makeRLearner.regr.__mlrmocklearners__6 = function() { # nolint +makeRLearner.regr.__mlrmocklearners__6 = function() { + + # nolint makeRLearnerRegr( cl = "regr.__mlrmocklearners__6", package = character(0L), par.set = makeParamSet(), @@ -104,18 +129,24 @@ makeRLearner.regr.__mlrmocklearners__6 = function() { # nolint ) } -trainLearner.regr.__mlrmocklearners__6 = function(.learner, .task, .subset, .weights = NULL, ...) { # nolint +trainLearner.regr.__mlrmocklearners__6 = function(.learner, .task, .subset, .weights = NULL, ...) { + + # nolint list(weights = .weights) } -predictLearner.regr.__mlrmocklearners__6 = function(.learner, .model, .newdata) { # nolint +predictLearner.regr.__mlrmocklearners__6 = function(.learner, .model, .newdata) { + + # nolint rep(1, nrow(.newdata)) } registerS3method("makeRLearner", "regr.__mlrmocklearners__6", makeRLearner.regr.__mlrmocklearners__6) registerS3method("trainLearner", "regr.__mlrmocklearners__6", trainLearner.regr.__mlrmocklearners__6) registerS3method("predictLearner", "regr.__mlrmocklearners__6", predictLearner.regr.__mlrmocklearners__6) -makeRLearner.classif.__mlrmocklearners__6 = function() { # nolint +makeRLearner.classif.__mlrmocklearners__6 = function() { + + # nolint makeRLearnerClassif( cl = "classif.__mlrmocklearners__6", package = character(0L), par.set = makeParamSet(), @@ -123,17 +154,17 @@ makeRLearner.classif.__mlrmocklearners__6 = function() { # nolint ) } -trainLearner.classif.__mlrmocklearners__6 = function(.learner, .task, .subset, .weights = NULL, ...) { # nolint +trainLearner.classif.__mlrmocklearners__6 = function(.learner, .task, .subset, .weights = NULL, ...) { + + # nolint list(weights = .weights) } -predictLearner.classif.__mlrmocklearners__6 = function(.learner, .model, .newdata) { # nolint +predictLearner.classif.__mlrmocklearners__6 = function(.learner, .model, .newdata) { + + # nolint rep(1, nrow(.newdata)) } registerS3method("makeRLearner", "classif.__mlrmocklearners__6", makeRLearner.classif.__mlrmocklearners__6) registerS3method("trainLearner", "classif.__mlrmocklearners__6", trainLearner.classif.__mlrmocklearners__6) registerS3method("predictLearner", "classif.__mlrmocklearners__6", predictLearner.classif.__mlrmocklearners__6) - - - - diff --git a/tests/testthat/helper_objects.R b/tests/testthat/helper_objects.R index 25d4137a71..090623affb 100644 --- a/tests/testthat/helper_objects.R +++ b/tests/testthat/helper_objects.R @@ -3,45 +3,45 @@ data(BreastCancer, package = "mlbench", envir = environment()) data(spatial.task, package = "mlr", envir = environment()) binaryclass.df = Sonar -binaryclass.formula = Class~. +binaryclass.formula = Class ~ . binaryclass.target = "Class" binaryclass.train.inds = c(1:50, 100:150) -binaryclass.test.inds = setdiff(seq_len(nrow(binaryclass.df)), binaryclass.train.inds) +binaryclass.test.inds = setdiff(seq_len(nrow(binaryclass.df)), binaryclass.train.inds) binaryclass.train = binaryclass.df[binaryclass.train.inds, ] -binaryclass.test = binaryclass.df[binaryclass.test.inds, ] +binaryclass.test = binaryclass.df[binaryclass.test.inds, ] binaryclass.class.col = 61 binaryclass.class.levs = levels(binaryclass.df[, binaryclass.class.col]) binaryclass.task = makeClassifTask("binary", data = binaryclass.df, target = binaryclass.target) binaryclass.spatial.df = spatial.task$env$data coordinates = spatial.task$coordinates -binaryclass.spatial.formula = slides~. +binaryclass.spatial.formula = slides ~ . binaryclass.spatial.target = "slides" binaryclass.spatial.train.inds = c(1:300, 600:900) -binaryclass.spatial.test.inds = setdiff(seq_len(nrow(binaryclass.spatial.df)), binaryclass.spatial.train.inds) +binaryclass.spatial.test.inds = setdiff(seq_len(nrow(binaryclass.spatial.df)), binaryclass.spatial.train.inds) binaryclass.spatial.train = binaryclass.spatial.df[binaryclass.spatial.train.inds, ] -binaryclass.spatial.test = binaryclass.spatial.df[binaryclass.spatial.test.inds, ] +binaryclass.spatial.test = binaryclass.spatial.df[binaryclass.spatial.test.inds, ] binaryclass.spatial.class.col = 3 binaryclass.spatial.class.levs = levels(binaryclass.spatial.df[, binaryclass.spatial.class.col]) binaryclass.spatial.task = makeClassifTask("binary", data = binaryclass.spatial.df, target = binaryclass.spatial.target, coordinates = coordinates) multiclass.df = iris -multiclass.formula = Species~. +multiclass.formula = Species ~ . multiclass.target = "Species" multiclass.train.inds = c(1:30, 51:80, 101:130) -multiclass.test.inds = setdiff(1:150, multiclass.train.inds) +multiclass.test.inds = setdiff(1:150, multiclass.train.inds) multiclass.train = multiclass.df[multiclass.train.inds, ] -multiclass.test = multiclass.df[multiclass.test.inds, ] +multiclass.test = multiclass.df[multiclass.test.inds, ] multiclass.class.col = 5 multiclass.task = makeClassifTask("multiclass", data = multiclass.df, target = multiclass.target) multiclass.small.df = iris[c(1:3, 51:53, 101:103), ] -multiclass.small.formula = Species~. +multiclass.small.formula = Species ~ . multiclass.small.target = "Species" multiclass.small.train.inds = c(1:2, 4:5, 7:8) -multiclass.small.test.inds = setdiff(1:9, multiclass.small.train.inds) +multiclass.small.test.inds = setdiff(1:9, multiclass.small.train.inds) multiclass.small.train = multiclass.small.df[multiclass.small.train.inds, ] -multiclass.small.test = multiclass.small.df[multiclass.small.test.inds, ] +multiclass.small.test = multiclass.small.df[multiclass.small.test.inds, ] multiclass.small.class.col = 5 multiclass.small.task = makeClassifTask("multiclass", data = multiclass.small.df, target = multiclass.small.target) @@ -50,9 +50,9 @@ multilabel.df[, "y1"] = rep(c(TRUE, FALSE), 75L) multilabel.df[, "y2"] = rep(c(FALSE, TRUE), 75L) multilabel.target = c("y1", "y2") multilabel.train.inds = c(1:30, 51:80, 101:130) -multilabel.test.inds = setdiff(1:150, multilabel.train.inds) +multilabel.test.inds = setdiff(1:150, multilabel.train.inds) multilabel.train = multilabel.df[multilabel.train.inds, ] -multilabel.test = multilabel.df[multilabel.test.inds, ] +multilabel.test = multilabel.df[multilabel.test.inds, ] multilabel.task = makeMultilabelTask("multilabel", data = multilabel.df, target = multilabel.target) multilabel.formula.cbind = as.formula(paste("cbind(", paste(multilabel.target, collapse = ",", sep = " "), ") ~ .", sep = "")) multilabel.formula = as.formula(paste(paste(multilabel.target, collapse = "+"), "~.")) @@ -60,9 +60,9 @@ multilabel.small.inds = c(1, 52, 53, 123) noclass.df = iris[, -5] noclass.train.inds = c(1:30, 51:80, 101:130) -noclass.test.inds = setdiff(1:150, noclass.train.inds) +noclass.test.inds = setdiff(1:150, noclass.train.inds) noclass.train = noclass.df[noclass.train.inds, ] -noclass.test = noclass.df[noclass.test.inds, ] +noclass.test = noclass.df[noclass.test.inds, ] noclass.task = makeClusterTask("noclass", data = noclass.df) data(BostonHousing, package = "mlbench", envir = environment()) @@ -70,9 +70,9 @@ regr.df = BostonHousing regr.formula = medv ~ . regr.target = "medv" regr.train.inds = seq(1, 506, 7) -regr.test.inds = setdiff(seq_len(nrow(regr.df)), regr.train.inds) +regr.test.inds = setdiff(seq_len(nrow(regr.df)), regr.train.inds) regr.train = regr.df[regr.train.inds, ] -regr.test = regr.df[regr.test.inds, ] +regr.test = regr.df[regr.test.inds, ] regr.class.col = 14 regr.task = makeRegrTask("regrtask", data = regr.df, target = regr.target) @@ -80,9 +80,9 @@ regr.small.df = BostonHousing[150:160, ] regr.small.formula = medv ~ . regr.small.target = "medv" regr.small.train.inds = 1:7 -regr.small.test.inds = setdiff(seq_len(nrow(regr.small.df)), regr.small.train.inds) +regr.small.test.inds = setdiff(seq_len(nrow(regr.small.df)), regr.small.train.inds) regr.small.train = regr.small.df[regr.small.train.inds, ] -regr.small.test = regr.small.df[regr.small.test.inds, ] +regr.small.test = regr.small.df[regr.small.test.inds, ] regr.small.class.col = 14 regr.small.task = makeRegrTask("regrtask", data = regr.small.df, target = regr.small.target) @@ -90,9 +90,9 @@ regr.num.df = regr.df[, sapply(regr.df, is.numeric)] regr.num.formula = regr.formula regr.num.target = regr.target regr.num.train.inds = regr.train.inds -regr.num.test.inds = regr.test.inds +regr.num.test.inds = regr.test.inds regr.num.train = regr.num.df[regr.num.train.inds, ] -regr.num.test = regr.num.df[regr.num.test.inds, ] +regr.num.test = regr.num.df[regr.num.test.inds, ] regr.num.class.col = 13 regr.num.task = makeRegrTask("regrnumtask", data = regr.num.df, target = regr.num.target) @@ -101,18 +101,19 @@ regr.na.num.df[1, 1] = NA regr.na.num.formula = regr.num.formula regr.na.num.target = regr.num.target regr.na.num.train.inds = regr.num.train.inds -regr.na.num.test.inds = regr.num.test.inds +regr.na.num.test.inds = regr.num.test.inds regr.na.num.train = regr.na.num.df[regr.na.num.train.inds, ] -regr.na.num.test = regr.na.num.df[regr.na.num.test.inds, ] +regr.na.num.test = regr.na.num.df[regr.na.num.test.inds, ] regr.na.num.class.col = 13 regr.na.num.task = makeRegrTask("regrnanumdf", data = regr.na.num.df, target = regr.na.num.target) getSurvData = function(n = 100, p = 10) { + set.seed(1) beta = c(rep(1, 10), rep(0, p - 10)) x = matrix(rnorm(n * p), n, p) colnames(x) = sprintf("x%01i", 1:p) - real.time = - (log(runif(n))) / (10 * exp(drop(x %*% beta))) + real.time = -(log(runif(n))) / (10 * exp(drop(x %*% beta))) cens.time = rexp(n, rate = 1 / 10) status = ifelse(real.time <= cens.time, TRUE, FALSE) obs.time = ifelse(real.time <= cens.time, real.time, cens.time) + 1 @@ -129,28 +130,30 @@ surv.df = getSurvData() surv.formula = survival::Surv(time, status) ~ . surv.target = c("time", "status") surv.train.inds = seq(1, floor(2 / 3 * nrow(surv.df))) -surv.test.inds = setdiff(seq_len(nrow(surv.df)), surv.train.inds) +surv.test.inds = setdiff(seq_len(nrow(surv.df)), surv.train.inds) surv.train = surv.df[surv.train.inds, ] -surv.test = surv.df[surv.test.inds, ] +surv.test = surv.df[surv.test.inds, ] surv.task = makeSurvTask("survtask", data = surv.df, target = surv.target) rm(getSurvData) data("gunpoint.task", package = "mlr") data("fuelsubset.task", package = "mlr") fda.binary.gp.task = gunpoint.task -suppressMessages({gp = getTaskData(gunpoint.task, subset = seq_len(100), functionals.as = "dfcols")}) +suppressMessages({ + gp = getTaskData(gunpoint.task, subset = seq_len(100), functionals.as = "dfcols") +}) gp.fdf = makeFunctionalData(gp[, seq_len(51)], fd.features = list("fd" = 2:51)) fda.binary.gp.task.small = makeClassifTask(data = gp.fdf, target = "X1") fda.regr.fs.task = fuelsubset.task # nonsense fda multiclass task fda.multiclass.df = iris -fda.multiclass.formula = Species~. +fda.multiclass.formula = Species ~ . fda.multiclass.target = "Species" fda.multiclass.train.inds = c(1:30, 51:80, 101:130) -fda.multiclass.test.inds = setdiff(1:150, multiclass.train.inds) +fda.multiclass.test.inds = setdiff(1:150, multiclass.train.inds) fda.multiclass.train = multiclass.df[multiclass.train.inds, ] -fda.multiclass.test = multiclass.df[multiclass.test.inds, ] +fda.multiclass.test = multiclass.df[multiclass.test.inds, ] fda.multiclass.class.col = 5 mc.fdf = makeFunctionalData(fda.multiclass.df, fd.features = list("fd1" = 1:2, "fd2" = 3:4)) fda.multiclass.task = makeClassifTask("multiclass", data = mc.fdf, target = multiclass.target) diff --git a/tests/testthat/helper_zzz.R b/tests/testthat/helper_zzz.R index b01f652a84..835cf60f15 100644 --- a/tests/testthat/helper_zzz.R +++ b/tests/testthat/helper_zzz.R @@ -4,4 +4,3 @@ configureMlr(show.info = FALSE, show.learner.output = FALSE) library(checkmate) library(BBmisc) - diff --git a/tests/testthat/test_base_BaggingWrapper.R b/tests/testthat/test_base_BaggingWrapper.R index da7c875efa..2e66494bac 100644 --- a/tests/testthat/test_base_BaggingWrapper.R +++ b/tests/testthat/test_base_BaggingWrapper.R @@ -71,4 +71,3 @@ test_that("BaggingWrapper with glmnet (#958)", { pred = predict(mod, multiclass.task) expect_error(pred, NA) }) - diff --git a/tests/testthat/test_base_BaseWrapper.R b/tests/testthat/test_base_BaseWrapper.R index 6434236d42..ecf7cbc58b 100644 --- a/tests/testthat/test_base_BaseWrapper.R +++ b/tests/testthat/test_base_BaseWrapper.R @@ -28,8 +28,8 @@ test_that("Joint model performance estimation, tuning, and model performance", { lrn2 = makeTuneWrapper( learner = lrn, par.set = makeParamSet( - makeDiscreteParam("C", values = 2 ^ (-2:2)), - makeDiscreteParam("sigma", values = 2 ^ (-2:2)) + makeDiscreteParam("C", values = 2^(-2:2)), + makeDiscreteParam("sigma", values = 2^(-2:2)) ), measures = list(auc, acc), control = makeTuneControlRandom(maxit = 3L), @@ -57,8 +57,8 @@ test_that("Error when wrapping tune wrapper around another optimization wrapper" lrn3 = makeTuneWrapper( learner = lrn2, par.set = makeParamSet( - makeDiscreteParam("C", values = 2 ^ (-2:2)), - makeDiscreteParam("sigma", values = 2 ^ (-2:2)) + makeDiscreteParam("C", values = 2^(-2:2)), + makeDiscreteParam("sigma", values = 2^(-2:2)) ), measures = list(auc, acc), control = makeTuneControlRandom(maxit = 3L), @@ -67,5 +67,3 @@ test_that("Error when wrapping tune wrapper around another optimization wrapper" bmrk = benchmark(lrn3, pid.task) }, "Cannot wrap a tuning wrapper around another optimization wrapper!") }) - - diff --git a/tests/testthat/test_base_ConstantClassWrapper.R b/tests/testthat/test_base_ConstantClassWrapper.R index 173fafad6b..b135e597a3 100644 --- a/tests/testthat/test_base_ConstantClassWrapper.R +++ b/tests/testthat/test_base_ConstantClassWrapper.R @@ -18,7 +18,9 @@ test_that("ConstantClassWrapper predicts with response", { # one class present train.inds = 1:20 try({ - suppressAll({train(lrn1, multiclass.task, subset = train.inds)}) + suppressAll({ + train(lrn1, multiclass.task, subset = train.inds) + }) fail("Data has more than one class.") }, silent = TRUE) m2 = train(lrn2, multiclass.task, subset = train.inds) @@ -65,7 +67,9 @@ test_that("ConstantClassWrapper predicts with probs", { # one class present train.inds = 1:20 try({ - suppressAll({train(lrn1, multiclass.task, subset = train.inds)}) + suppressAll({ + train(lrn1, multiclass.task, subset = train.inds) + }) fail("Data has more than one class.") }, silent = TRUE) m2 = train(lrn2, multiclass.task, subset = train.inds) @@ -78,6 +82,7 @@ test_that("ConstantClassWrapper predicts with probs", { probs = getPredictionProbabilities(p2) sapply(names(probs), function(col) { + prob = ifelse(col == unique(multiclass.df[train.inds, multiclass.target]), 1, 0) expect_true(all(probs[col] == prob)) }) diff --git a/tests/testthat/test_base_MulticlassWrapper.R b/tests/testthat/test_base_MulticlassWrapper.R index 8ae9f96722..c9aaf353d9 100644 --- a/tests/testthat/test_base_MulticlassWrapper.R +++ b/tests/testthat/test_base_MulticlassWrapper.R @@ -1,9 +1,11 @@ context("MulticlassWrapper") test_that("MulticlassWrapper", { - #cmatrix function + # cmatrix function ownCmatrix = function(task) { + cm.onevsrest = function(task) { + n = length(getTaskClassLevels(task)) cm = matrix(-1, n, n) diag(cm) = 1 @@ -12,10 +14,12 @@ test_that("MulticlassWrapper", { } cm = cm.onevsrest(task) levs = getTaskClassLevels(task) - if (!setequal(rownames(cm), levs)) + if (!setequal(rownames(cm), levs)) { stop("Rownames of codematrix must be class levels!") - if (!all(cm == 1 | cm == -1 | cm == 0)) + } + if (!all(cm == 1 | cm == -1 | cm == 0)) { stop("Codematrix must only contain: -1, 0, +1!") + } cm } diff --git a/tests/testthat/test_base_PreprocWrapper.R b/tests/testthat/test_base_PreprocWrapper.R index 81a05d5549..9c7b96b574 100644 --- a/tests/testthat/test_base_PreprocWrapper.R +++ b/tests/testthat/test_base_PreprocWrapper.R @@ -2,10 +2,12 @@ context("PreprocWrapper") test_that("PreprocWrapper", { f1 = function(data, target, args) { + data[, 2] = args$x * data[, 2] return(list(data = data, control = list())) } f2 = function(data, target, args, control) { + data[, 2] = args$x * data[, 2] return(data) } @@ -47,4 +49,3 @@ test_that("PreprocWrapper with glmnet (#958)", { pred = predict(mod, multiclass.task) expect_error(pred, NA) }) - diff --git a/tests/testthat/test_base_SupervisedTask.R b/tests/testthat/test_base_SupervisedTask.R index edc9c5b71e..386eb36de1 100644 --- a/tests/testthat/test_base_SupervisedTask.R +++ b/tests/testthat/test_base_SupervisedTask.R @@ -52,8 +52,10 @@ test_that("SupervisedTask dropping of levels works", { task = makeRegrTask(data = d, target = colnames(iris)[1], fixup.data = "quiet") e = getTaskData(task) expect_true(setequal(levels(e$Species), levs1)) - expect_warning({task = makeRegrTask(data = d, target = colnames(iris)[1], fixup.data = "warn")}, - "Empty factor levels") + expect_warning({ + task = makeRegrTask(data = d, target = colnames(iris)[1], fixup.data = "warn") + }, + "Empty factor levels") e = getTaskData(task) expect_true(setequal(levels(e$Species), levs1)) @@ -61,12 +63,13 @@ test_that("SupervisedTask dropping of levels works", { "Empty factor levels") expect_warning(makeMultilabelTask("multilabel", multilabel.df[1:10, ], target = c("y1", "y2"), fixup.data = "quiet"), NA) - }) test_that("SupervisedTask does not drop positive class", { data = iris[1:100, ] - expect_warning({task = makeClassifTask(data = data, target = "Species")}, "empty factor levels") + expect_warning({ + task = makeClassifTask(data = data, target = "Species") + }, "empty factor levels") td = getTaskDesc(task) expect_true(setequal(c(td$positive, td$negative), unique(data$Species))) }) diff --git a/tests/testthat/test_base_TuneWrapper.R b/tests/testthat/test_base_TuneWrapper.R index 5eacb14d10..df012e7603 100644 --- a/tests/testthat/test_base_TuneWrapper.R +++ b/tests/testthat/test_base_TuneWrapper.R @@ -101,7 +101,7 @@ test_that("TuneWrapper works with nested sampling and threshold tuning, cf. issu rdesc = makeResampleDesc("Holdout") ctrl = makeTuneControlGrid(tune.threshold = TRUE, tune.threshold.args = list(nsub = 2L)) ps = makeParamSet( - makeDiscreteParam("C", 2^ (-1)) + makeDiscreteParam("C", 2^(-1)) ) lrn1 = makeLearner("classif.ksvm", predict.type = "prob") lrn2 = makeTuneWrapper(lrn1, resampling = rdesc, measures = list(ber, mmce), @@ -119,4 +119,3 @@ test_that("TuneWrapper with glmnet (#958)", { pred = predict(mod, multiclass.task) expect_error(pred, NA) }) - diff --git a/tests/testthat/test_base_UnsupervisedTask.R b/tests/testthat/test_base_UnsupervisedTask.R index b19492c4f5..699f7510c7 100644 --- a/tests/testthat/test_base_UnsupervisedTask.R +++ b/tests/testthat/test_base_UnsupervisedTask.R @@ -31,5 +31,4 @@ test_that("UnsupervisedTask", { expect_warning(makeClusterTask("cluster", iris[1:10, ], fixup.data = "warn"), "Empty factor levels") expect_warning(makeClusterTask("cluster", iris[1:10, ], fixup.data = "quiet"), NA) - }) diff --git a/tests/testthat/test_base_benchmark.R b/tests/testthat/test_base_benchmark.R index 801ca8dccb..c1ea043088 100644 --- a/tests/testthat/test_base_benchmark.R +++ b/tests/testthat/test_base_benchmark.R @@ -164,6 +164,7 @@ test_that("benchmark", { expect_equal(unique(tffd$iter), 1:2) f = function(tmp, cl) { + context(sprintf("benchmark: extracting %s", cl)) expect_true(is.list(tmp)) expect_true(setequal(names(tmp), task.names)) @@ -193,7 +194,7 @@ test_that("keep.preds and models are passed down to resample()", { expect_list(x$models, types = "WrappedModel") expect_is(x$pred, "ResamplePrediction") - ##test getter function for models + ## test getter function for models models = getBMRModels(res) expect_true(is.list(models)) expect_true(setequal(names(models), "binary")) @@ -249,6 +250,7 @@ test_that("drop option works for BenchmarkResults_operators", { # check all other functions that use 'drop' briefly testDropOption = function(bmr, fun, new.names, ...) { + extra.args = list(...) res = do.call(fun, c(list(bmr, drop = TRUE), extra.args)) expect_true(all(names(res) == new.names)) @@ -263,16 +265,3 @@ test_that("drop option works for BenchmarkResults_operators", { testDropOption(one.two, getBMROptResults, new.names = learner.names, wrapper.class = "cl") }) - - - - - - - - - - - - - diff --git a/tests/testthat/test_base_blocking.R b/tests/testthat/test_base_blocking.R index 6b2124ba53..05ed10f15c 100644 --- a/tests/testthat/test_base_blocking.R +++ b/tests/testthat/test_base_blocking.R @@ -17,6 +17,7 @@ test_that("blocking", { # test blocking in resample lrn = makeLearner("classif.lda") mycheck = function(rdesc, p, b) { + for (j in 1:rdesc$iters) { test.j = p$data[p$data$iter == j, "id"] tab = table(b[test.j]) diff --git a/tests/testthat/test_base_caching.R b/tests/testthat/test_base_caching.R index acae0301cb..923ef41dfa 100644 --- a/tests/testthat/test_base_caching.R +++ b/tests/testthat/test_base_caching.R @@ -1,7 +1,6 @@ context("caching") test_that("caching works with most filters", { - filters = as.character(listFilterMethods()$id) filter.list = listFilterMethods(desc = FALSE, tasks = TRUE, features = FALSE) filter.list.classif = as.character(filter.list$id)[filter.list$task.classif] @@ -12,36 +11,38 @@ test_that("caching works with most filters", { # tune over various filters using all possible caching options # TRUE is not tested, as we are not allowed to write in the user's home dir - out = lapply(list(FALSE, tempdir()), function (i) { + out = lapply(list(FALSE, tempdir()), function(i) { tune_out = lapply(filter.list.regr, function(.x) { + lrn = makeFilterWrapper(learner = "regr.ksvm", fw.method = .x, cache = i) ps = makeParamSet(makeNumericParam("fw.perc", lower = 0, upper = 1), - makeNumericParam("C", lower = -10, upper = 10, - trafo = function(x) 2^x), - makeNumericParam("sigma", lower = -10, upper = 10, - trafo = function(x) 2^x) + makeNumericParam("C", lower = -10, upper = 10, + trafo = function(x) 2^x), + makeNumericParam("sigma", lower = -10, upper = 10, + trafo = function(x) 2^x) ) rdesc = makeResampleDesc("CV", iters = 3) # print(.x) tuneParams(lrn, task = regr.num.task, resampling = rdesc, par.set = ps, - control = makeTuneControlRandom(maxit = 5), - show.info = FALSE) + control = makeTuneControlRandom(maxit = 5), + show.info = FALSE) }) }) expect_equal(out[[1]][["opt.path"]][["env"]][["path"]][["mse.test.mean"]], - out[[2]][["opt.path"]][["env"]][["path"]][["mse.test.mean"]], - out[[3]][["opt.path"]][["env"]][["path"]][["mse.test.mean"]]) + out[[2]][["opt.path"]][["env"]][["path"]][["mse.test.mean"]], + out[[3]][["opt.path"]][["env"]][["path"]][["mse.test.mean"]]) }) test_that("cache dir is successfully deleted", { skip_on_cran() # we are not allowed to write to the user's home dir! dir = getCacheDir() - if (!dir.exists(dir)) + if (!dir.exists(dir)) { dir.create(dir, recursive = TRUE) + } expect_true(dir.exists(getCacheDir())) deleteCacheDir() diff --git a/tests/testthat/test_base_calculateConfusionMatrix.R b/tests/testthat/test_base_calculateConfusionMatrix.R index f995853c3a..da3ac1df66 100644 --- a/tests/testthat/test_base_calculateConfusionMatrix.R +++ b/tests/testthat/test_base_calculateConfusionMatrix.R @@ -1,38 +1,38 @@ context("calculateConfusionMatrix") test_that("calculateConfusionMatrix", { - test.confMatrix = function(p) { + lvls = getTaskClassLevels(p$task.desc) n = getTaskSize(p$task.desc) l = length(lvls) - #test absolute + # test absolute cm = calculateConfusionMatrix(p, relative = FALSE) - expect_true(is.matrix(cm$result) && nrow(cm$result) == l + 1 && ncol(cm$result) == l + 1) + expect_true(is.matrix(cm$result) && nrow(cm$result) == l + 1 && ncol(cm$result) == l + 1) expect_set_equal(cm$result[1:l, l + 1], cm$result[l + 1, 1:l]) - #test absolute number of errors + # test absolute number of errors d = cm$result[1:l, 1:l] diag(d) = 0 expect_true(sum(unlist(d)) == cm$result[l + 1, l + 1]) - #test absolute with sums + # test absolute with sums cm = calculateConfusionMatrix(p, sums = TRUE) - expect_true(is.matrix(cm$result) && nrow(cm$result) == l + 2 && ncol(cm$result) == l + 2) + expect_true(is.matrix(cm$result) && nrow(cm$result) == l + 2 && ncol(cm$result) == l + 2) expect_set_equal(cm$result[1:l, l + 1], cm$result[l + 1, 1:l]) - #test absolute number of errors + # test absolute number of errors d = cm$result[1:l, 1:l] diag(d) = 0 expect_true(sum(unlist(d)) == cm$result[l + 1, l + 1]) - #test relative + # test relative cm = calculateConfusionMatrix(p, relative = TRUE) - #sums have to be 1 or 0 (if no observation in that group) + # sums have to be 1 or 0 (if no observation in that group) expect_true(all(rowSums(cm$relative.row[, 1:l]) == 1 | - rowSums(cm$relative.row[, 1:l]) == 0)) + rowSums(cm$relative.row[, 1:l]) == 0)) expect_true(all(colSums(cm$relative.col[1:l, ]) == 1 | - colSums(cm$relative.col[1:l, ]) == 0)) + colSums(cm$relative.col[1:l, ]) == 0)) } @@ -44,14 +44,15 @@ test_that("calculateConfusionMatrix", { test.confMatrix(r$pred) - #dropped class lvls + # dropped class lvls newdata = droplevels(multiclass.df[1L, ]) m = train("classif.rpart", multiclass.task) p = predict(m, newdata = newdata) test.confMatrix(p) - #failure model - data = iris; data[, 1] = 1 + # failure model + data = iris + data[, 1] = 1 lrn = makeLearner("classif.lda", config = list(on.learner.error = "quiet")) task = makeClassifTask(data = data, target = "Species") r = holdout(lrn, task, measures = ber) @@ -59,7 +60,7 @@ test_that("calculateConfusionMatrix", { }) test_that("calculateConfusionMatrix elements are consistent with implemented measures", { - #check values itself + # check values itself task = subsetTask(sonar.task, 1:32) pred = holdout(makeLearner("classif.rpart"), task, split = 1 / 2)$pred truth = factor(rep(c("M", "R"), c(4, 12))) @@ -77,10 +78,10 @@ test_that("calculateConfusionMatrix elements are consistent with implemented mea fn = cm$result[1, 2] fp = cm$result[2, 1] tn = cm$result[2, 2] - cp = tp + fn # condition positive - cn = tn + fp # condition negative - pp = tp + fp # predicted positive - pn = tn + fn # predicted negative + cp = tp + fn # condition positive + cn = tn + fp # condition negative + pp = tp + fp # predicted positive + pn = tn + fn # predicted negative # expect_equivalent instead of expect_equal because the performance() result # contains an attribute (the name) @@ -121,37 +122,37 @@ test_that("calculateConfusionMatrix with different factor levels (#2030)", { test_that("calculateConfusionMatrix set argument works", { - mod = train("classif.lda", iris.task) - pred1 = predict(mod, iris.task) - rdesc = makeResampleDesc("CV", iters = 10, predict = "both") - # here, you have set=train and set=test in pred3$data: - pred2 = resample("classif.rpart", iris.task, rdesc)$pred - - # pred1$data has no column "set" => argument set="train" would *not* make sense - expect_error(calculateConfusionMatrix(pred1, set = "train")) - - # pred3 was predicted on both train and test set. Both subsetted matrices should give - # a positive total count: - test.obs = table(pred2$data$set)["test"] - train.obs = table(pred2$data$set)["train"] - expect_equivalent(sum(calculateConfusionMatrix(pred2, set = "train")$result[1:3, 1:3]), train.obs) - expect_equivalent(sum(calculateConfusionMatrix(pred2, set = "test")$result[1:3, 1:3]), test.obs) + mod = train("classif.lda", iris.task) + pred1 = predict(mod, iris.task) + rdesc = makeResampleDesc("CV", iters = 10, predict = "both") + # here, you have set=train and set=test in pred3$data: + pred2 = resample("classif.rpart", iris.task, rdesc)$pred + + # pred1$data has no column "set" => argument set="train" would *not* make sense + expect_error(calculateConfusionMatrix(pred1, set = "train")) + + # pred3 was predicted on both train and test set. Both subsetted matrices should give + # a positive total count: + test.obs = table(pred2$data$set)["test"] + train.obs = table(pred2$data$set)["train"] + expect_equivalent(sum(calculateConfusionMatrix(pred2, set = "train")$result[1:3, 1:3]), train.obs) + expect_equivalent(sum(calculateConfusionMatrix(pred2, set = "test")$result[1:3, 1:3]), test.obs) }) test_that("calculateConfusionMatrix raises error when set argument is 'wrong'", { - # if a resampled prediction was computed with predict = "train" and is passed - # with set = "test" (and vice-versa), calculateConfusionMatrix should raise - # an error + # if a resampled prediction was computed with predict = "train" and is passed + # with set = "test" (and vice-versa), calculateConfusionMatrix should raise + # an error - rdesc.test = makeResampleDesc("CV", iters = 3, predict = "test") - pred.test = resample("classif.rpart", iris.task, rdesc.test)$pred + rdesc.test = makeResampleDesc("CV", iters = 3, predict = "test") + pred.test = resample("classif.rpart", iris.task, rdesc.test)$pred - expect_error(calculateConfusionMatrix(pred.test, set = "train")) + expect_error(calculateConfusionMatrix(pred.test, set = "train")) }) test_that("calculateConfusionMatrix returns all-zero matrix when prediction object is empty", { - mod = train("classif.lda", iris.task) - pred = predict(mod, iris.task) - pred$data = pred$data[FALSE, ] - expect_true(all(calculateConfusionMatrix(pred)$result == 0)) + mod = train("classif.lda", iris.task) + pred = predict(mod, iris.task) + pred$data = pred$data[FALSE, ] + expect_true(all(calculateConfusionMatrix(pred)$result == 0)) }) diff --git a/tests/testthat/test_base_calculateROCMeasures.R b/tests/testthat/test_base_calculateROCMeasures.R index eea8c2d99c..af4a20df89 100644 --- a/tests/testthat/test_base_calculateROCMeasures.R +++ b/tests/testthat/test_base_calculateROCMeasures.R @@ -24,4 +24,3 @@ test_that("calculateROCMeasures", { expect_equal(r$measures$fomr, 1 - measureNPV(truth, response, negative)) expect_equal(r$measures$acc, measureACC(truth, response)) }) - diff --git a/tests/testthat/test_base_capLargeValues.R b/tests/testthat/test_base_capLargeValues.R index dec68c0e4a..df61e1e295 100644 --- a/tests/testthat/test_base_capLargeValues.R +++ b/tests/testthat/test_base_capLargeValues.R @@ -1,7 +1,7 @@ context("capLargeValues") test_that("capLargeValues.data.frame", { - #capLargeValues works + # capLargeValues works d1 = data.frame(x = 1:10, y = c(1:9, Inf), z = c(-11:-20)) d2 = capLargeValues(d1, threshold = 10, impute = 10) expect_equal(d2, data.frame(x = 1:10, y = c(1:10), z = rep(-10, 10))) @@ -10,13 +10,13 @@ test_that("capLargeValues.data.frame", { d2 = capLargeValues(d1, threshold = 10, impute = 2, cols = "z") expect_equal(d2, data.frame(x = 1:10, y = c(1:9, Inf), z = rep(-2, 10))) - #check arg target + # check arg target d1$tar = 11:20 d2 = capLargeValues(d1, target = "tar", threshold = 10, impute = 10) expect_equal(d2, data.frame(x = 1:10, y = c(1:10), z = rep(-10, 10), tar = 11:20)) - #check arg what + # check arg what d1 = data.frame(x = c(-10, 1, 10)) d2 = capLargeValues(d1, threshold = 9, what = "abs") expect_equal(d2, data.frame(x = c(-9, 1, 9))) diff --git a/tests/testthat/test_base_checkTaskSubset.R b/tests/testthat/test_base_checkTaskSubset.R index c5e44e2a7f..d9d554c6fd 100644 --- a/tests/testthat/test_base_checkTaskSubset.R +++ b/tests/testthat/test_base_checkTaskSubset.R @@ -7,8 +7,7 @@ test_that("checkTaskSubset", { subs.bool = sample(c(TRUE, FALSE), size = 50, replace = TRUE) expect_equal(which(subs.bool), checkTaskSubset(subs.bool, size = 50)) expect_error(checkTaskSubset(subs20, size = 10), regexp = "<= 10") - #oversampling is allowed + # oversampling is allowed subs50 = sample.int(20, 50, replace = TRUE) expect_equal(subs50, checkTaskSubset(subs50, size = 20)) }) - diff --git a/tests/testthat/test_base_clustering.R b/tests/testthat/test_base_clustering.R index 451f161aa4..877e9672a3 100644 --- a/tests/testthat/test_base_clustering.R +++ b/tests/testthat/test_base_clustering.R @@ -1,6 +1,6 @@ context("clustering") -test_that("clustering predict", { +test_that("clustering predict", { lrn = makeLearner("cluster.cmeans", predict.type = "prob") model = train(lrn, noclass.task) pred = predict(model, task = noclass.task) @@ -11,7 +11,7 @@ test_that("clustering predict", { }) -test_that("clustering performance", { +test_that("clustering performance", { lrn = makeLearner("cluster.SimpleKMeans") model = train(lrn, noclass.task) pred = predict(model, task = noclass.task) @@ -23,7 +23,7 @@ test_that("clustering performance", { expect_true(is.numeric(performance(pred, task = noclass.task, measures = silhouette))) }) -test_that("clustering performance with missing clusters", { +test_that("clustering performance with missing clusters", { lrn = makeLearner("cluster.SimpleKMeans") model = train(lrn, noclass.task) pred = predict(model, task = noclass.task) @@ -36,7 +36,7 @@ test_that("clustering performance with missing clusters", { expect_warning(performance(pred, task = noclass.task, measures = silhouette), NA) }) -test_that("clustering resample", { +test_that("clustering resample", { rdesc = makeResampleDesc("Subsample", split = 0.3, iters = 2) lrn = makeLearner("cluster.SimpleKMeans") res = resample(lrn, noclass.task, rdesc) diff --git a/tests/testthat/test_base_convertBMRToRankMatrix.R b/tests/testthat/test_base_convertBMRToRankMatrix.R index 05344cfd08..003aa15668 100644 --- a/tests/testthat/test_base_convertBMRToRankMatrix.R +++ b/tests/testthat/test_base_convertBMRToRankMatrix.R @@ -1,7 +1,6 @@ context("convertBMRToRankMatrix") test_that("convertBMRToRankMatrix", { - lrns = list(makeLearner("classif.nnet"), makeLearner("classif.rpart")) tasks = list(multiclass.task, binaryclass.task) rdesc = makeResampleDesc("CV", iters = 2L) diff --git a/tests/testthat/test_base_convertMLBenchObjToTask.R b/tests/testthat/test_base_convertMLBenchObjToTask.R index e2671b219c..7aab857723 100644 --- a/tests/testthat/test_base_convertMLBenchObjToTask.R +++ b/tests/testthat/test_base_convertMLBenchObjToTask.R @@ -9,8 +9,9 @@ test_that("convertMLbenchObjToTask", { task = convertMLBenchObjToTask(f, n = n) expect_is(task, "Task") # for some, n is not properly respected in mlbench - if (f %nin% c("mlbench.corners", "mlbench.hypercube", "mlbench.simplex")) + if (f %nin% c("mlbench.corners", "mlbench.hypercube", "mlbench.simplex")) { expect_equal(getTaskSize(task), n) + } } # get all mlbench datasets, HouseVotes84 and Ozone have NAs in target col diff --git a/tests/testthat/test_base_createDummyFeatures.R b/tests/testthat/test_base_createDummyFeatures.R index d85a6f6b93..90ccade588 100644 --- a/tests/testthat/test_base_createDummyFeatures.R +++ b/tests/testthat/test_base_createDummyFeatures.R @@ -25,6 +25,6 @@ test_that("createDummyFeatures", { levels(df$quan) = c("<5", ">5") df.cdf = createDummyFeatures(df) colnames = names(df.cdf) - expect_false("<5" %in% colnames || ">5" %in% colnames) - expect_true("quan..5" %in% colnames || "quan..5.1" %in% colnames) + expect_false("<5" %in% colnames || ">5" %in% colnames) + expect_true("quan..5" %in% colnames || "quan..5.1" %in% colnames) }) diff --git a/tests/testthat/test_base_createSpatialResamplingPlots.R b/tests/testthat/test_base_createSpatialResamplingPlots.R index 74c5f1e9e0..3c97237796 100644 --- a/tests/testthat/test_base_createSpatialResamplingPlots.R +++ b/tests/testthat/test_base_createSpatialResamplingPlots.R @@ -1,5 +1,4 @@ test_that("test createSpatialResamplingPlots() creates 10 ggplot objects", { - data(spatial.task, package = "mlr", envir = environment()) # take more reps to see if the restriction on two reps works @@ -12,6 +11,4 @@ test_that("test createSpatialResamplingPlots() creates 10 ggplot objects", { expect_class(plots[[1]][[1]], "gg") expect_length(plots[[1]], 10) - }) - diff --git a/tests/testthat/test_base_debugdump.R b/tests/testthat/test_base_debugdump.R index f8b3fde997..afabaed46e 100644 --- a/tests/testthat/test_base_debugdump.R +++ b/tests/testthat/test_base_debugdump.R @@ -73,4 +73,3 @@ test_that("error dump is created during tune", { expect_class(getOptPathEl(z$opt.path, 1)$extra$.dump[[1]]$predict.train, "dump.frames") do.call(configureMlr, mlr.options) }) - diff --git a/tests/testthat/test_base_downsample.R b/tests/testthat/test_base_downsample.R index 112228b1d2..ba0b68e3f6 100644 --- a/tests/testthat/test_base_downsample.R +++ b/tests/testthat/test_base_downsample.R @@ -1,6 +1,6 @@ context("downsample") -test_that("downsample", { +test_that("downsample", { down.tsk = downsample(multiclass.task, perc = 1 / 3) expect_equal(getTaskSize(down.tsk), 50L) rsm.methods = c("Bootstrap", "Subsample", "Holdout") @@ -16,7 +16,7 @@ test_that("downsample", { } }) -test_that("downsample wrapper", { +test_that("downsample wrapper", { # test it with classif rdesc = makeResampleDesc("CV", iters = 2) lrn = makeDownsampleWrapper("classif.rpart", dw.perc = 0.5) @@ -30,7 +30,7 @@ test_that("downsample wrapper", { expect_true(!is.na(r$aggr)) }) -test_that("downsample wrapper works with xgboost, we had issue #492", { +test_that("downsample wrapper works with xgboost, we had issue #492", { skip_if_not_installed("xgboost") # xgboost broken on CRAN, they cannot run our tests rdesc = makeResampleDesc("CV", iters = 2) lrn = makeDownsampleWrapper("classif.xgboost", dw.perc = 0.5) @@ -39,7 +39,7 @@ test_that("downsample wrapper works with xgboost, we had issue #492", { expect_true(!is.na(r$aggr)) }) -test_that("downsample wrapper works with weights, we had issue #838", { +test_that("downsample wrapper works with weights, we had issue #838", { n = nrow(regr.df) w = 1:n task = makeRegrTask(data = regr.df, target = regr.target, weights = w) @@ -69,9 +69,9 @@ test_that("training performance works as expected (#1357)", { properties = c("classif", "classif.multi", "req.pred", "req.truth"), name = "Number", fun = function(task, model, pred, feats, extra.args) { + length(pred$data$response) - } - ) + }) rdesc = makeResampleDesc("Holdout", predict = "both") lrn = makeDownsampleWrapper("classif.rpart", dw.perc = 0.1) diff --git a/tests/testthat/test_base_dropFeatures.R b/tests/testthat/test_base_dropFeatures.R index e93349a292..9041a30d62 100644 --- a/tests/testthat/test_base_dropFeatures.R +++ b/tests/testthat/test_base_dropFeatures.R @@ -1,6 +1,6 @@ context("dropFeatures") -test_that("dropFeatures", { +test_that("dropFeatures", { fns = getTaskFeatureNames(multiclass.task) task2 = dropFeatures(multiclass.task, fns[1]) expect_equal(length(getTaskFeatureNames(task2)), 3L) diff --git a/tests/testthat/test_base_fda.R b/tests/testthat/test_base_fda.R index ce3ee989ce..d9c639cdbc 100644 --- a/tests/testthat/test_base_fda.R +++ b/tests/testthat/test_base_fda.R @@ -60,7 +60,6 @@ test_that("makeFunctionalData subsetting works", { }) test_that("makeFunctionalData works for different inputs", { - df = data.frame(matrix(rnorm(50), nrow = 5)) # for 1-D matricies fdf = makeFunctionalData(df, fd.features = list("fd1" = 1, "fd2" = 2:10)) @@ -135,7 +134,6 @@ test_that("getFunctionalFeatures works for different inputs", { test_that("makeFunctionalData Tasks work", { - df = data.frame(matrix(rnorm(50), nrow = 5)) df$tcl = as.factor(letters[1:5]) df$treg = 1:5 @@ -197,12 +195,10 @@ test_that("makeFunctionalData Tasks work", { expect_equal(subs.clust2$task.desc$n.feat["numerics"], c("numerics" = 1L)) expect_equal(subs.clust2$task.desc$n.feat["ordered"], c("ordered" = 0L)) expect_equal(subs.clust2$task.desc$size, 5L) - }) test_that("getTaskData for functionals", { - df = data.frame(matrix(rnorm(50), nrow = 5)) df$tcl = as.factor(letters[1:5]) df$treg = 1:5 @@ -210,7 +206,9 @@ test_that("getTaskData for functionals", { # For a classification clt = makeClassifTask(data = fdf, target = "tcl") - expect_message({tdata1 = getTaskData(clt, functionals.as = "dfcols")}, "have been converted to numerics") + expect_message({ + tdata1 = getTaskData(clt, functionals.as = "dfcols") + }, "have been converted to numerics") expect_true(!("matrix" %in% lapply(tdata1, class))) expect_equal(tdata1[, getTaskTargetNames(clt)], as.factor(letters[1:5])) @@ -223,21 +221,24 @@ test_that("getTaskData for functionals", { expect_equal(tdata3$target, as.factor(letters[1:5])) expect_true("matrix" %in% unlist(lapply(tdata3$data, class))) - expect_message({tdata4 = getTaskData(clt, functionals.as = "dfcols", target.extra = TRUE)}) + expect_message({ + tdata4 = getTaskData(clt, functionals.as = "dfcols", target.extra = TRUE) + }) expect_true(!("matrix" %in% lapply(tdata4$data, class))) expect_equal(tdata4$target, as.factor(letters[1:5])) # For clustering task clustt = makeClusterTask(data = fdf) - expect_message({tdatacl1 = getTaskData(clustt, functionals.as = "dfcols")}, "have been converted to numerics") + expect_message({ + tdatacl1 = getTaskData(clustt, functionals.as = "dfcols") + }, "have been converted to numerics") expect_true(!("matrix" %in% lapply(tdatacl1, class))) tdatacl2 = getTaskData(clustt, functionals.as = "matrix") expect_true("matrix" %in% unlist(lapply(tdatacl2, class))) }) test_that("changeData for functionals", { - df = data.frame(matrix(rnorm(50), nrow = 5)) df$tcl = as.factor(letters[1:5]) df$treg = 1:5 @@ -258,7 +259,6 @@ test_that("changeData for functionals", { }) test_that("makeFunctionalData produces valid error messages", { - df = data.frame("x" = 1:3, "y" = 2:4, "z" = letters[1:3]) expect_error(makeFunctionalData(df, fd.features = list("fd1" = 1:4)), "Must be a subset of") expect_error(makeFunctionalData(df, fd.features = list("fd1" = 1:3)), "contains non-integer") @@ -300,8 +300,7 @@ test_that("makeFunctionalData produces valid error messages", { expect_equal(dim(fdf5$fd1), c(3, 1)) expect_error(makeFunctionalData(data.frame(matrix(letters[1:9], nrow = 3)), - fd.features = list("fd1" = 1:3)), "fd.features contains non-integer") - + fd.features = list("fd1" = 1:3)), "fd.features contains non-integer") }) @@ -316,23 +315,30 @@ test_that("hasFunctionals works", { }) test_that("getTaskData for functional tasks", { - expect_true(hasFunctionalFeatures(getTaskData(fda.binary.gp.task, functionals.as = "matrix"))) - expect_message({df = getTaskData(fda.binary.gp.task, subset = 1:50, functionals.as = "dfcols")}) + expect_message({ + df = getTaskData(fda.binary.gp.task, subset = 1:50, functionals.as = "dfcols") + }) expect_false(hasFunctionalFeatures(df)) # Subset rows expect_true(hasFunctionalFeatures(getTaskData(fda.binary.gp.task, subset = 1:50, functionals.as = "matrix"))) - expect_message({df = getTaskData(fda.binary.gp.task, subset = 1:50, functionals.as = "dfcols")}) + expect_message({ + df = getTaskData(fda.binary.gp.task, subset = 1:50, functionals.as = "dfcols") + }) expect_false(hasFunctionalFeatures(df)) # We can not really subset cols for this task. expect_false(hasFunctionalFeatures(getTaskData(fda.regr.fs.task, features = 1, functionals.as = "matrix"))) expect_true(hasFunctionalFeatures(getTaskData(fda.regr.fs.task, features = 2, functionals.as = "matrix"))) expect_true(hasFunctionalFeatures(getTaskData(fda.regr.fs.task, features = 3, functionals.as = "matrix"))) - expect_silent({df = getTaskData(fda.regr.fs.task, features = 1, functionals.as = "dfcols")}) + expect_silent({ + df = getTaskData(fda.regr.fs.task, features = 1, functionals.as = "dfcols") + }) expect_false(hasFunctionalFeatures(df)) - expect_message({df = getTaskData(fda.regr.fs.task, features = c(2, 3), functionals.as = "dfcols")}) + expect_message({ + df = getTaskData(fda.regr.fs.task, features = c(2, 3), functionals.as = "dfcols") + }) expect_false(hasFunctionalFeatures(df)) @@ -340,19 +346,23 @@ test_that("getTaskData for functional tasks", { expect_error(hasFunctionalFeatures(getTaskData(fda.binary.gp.task, features = 2, functionals.as = "dfcols"))) expect_false(hasFunctionalFeatures(getTaskData(iris.task, functionals.as = "matrix"))) - expect_silent({df = getTaskData(iris.task, subset = 1:50, functionals.as = "matrix")}) + expect_silent({ + df = getTaskData(iris.task, subset = 1:50, functionals.as = "matrix") + }) expect_false(hasFunctionalFeatures(df)) expect_false(hasFunctionalFeatures(getTaskData(iris.task, functionals.as = "dfcols"))) - expect_silent({df = getTaskData(iris.task, subset = 1:50, functionals.as = "dfcols")}) + expect_silent({ + df = getTaskData(iris.task, subset = 1:50, functionals.as = "dfcols") + }) expect_false(hasFunctionalFeatures(df)) - }) test_that("benchmarking on fda tasks works", { - lrns = list(makeLearner("classif.fdausc.knn"), makeLearner("classif.rpart"), makeLearner("classif.featureless")) - expect_message({bmr = benchmark(lrns, fda.binary.gp.task.small, cv2)}, "Functional features have been") + expect_message({ + bmr = benchmark(lrns, fda.binary.gp.task.small, cv2) + }, "Functional features have been") expect_class(bmr, "BenchmarkResult") expect_equal(names(bmr$results$gp.fdf), c("classif.fdausc.knn", "classif.rpart", "classif.featureless")) expect_numeric(as.data.frame(bmr)$mmce, lower = 0L, upper = 1L) @@ -361,7 +371,9 @@ test_that("benchmarking on fda tasks works", { # Test benchmark mixed learners regression set.seed(getOption("mlr.debug.seed")) lrns2 = list(makeLearner("regr.FDboost"), makeLearner("regr.rpart"), makeLearner("regr.featureless")) - expect_message({bmr2 = benchmark(lrns2, fda.regr.fs.task, hout)}, "Functional features have been") + expect_message({ + bmr2 = benchmark(lrns2, fda.regr.fs.task, hout) + }, "Functional features have been") expect_class(bmr2, "BenchmarkResult") expect_equal(names(bmr2$results$fs.fdf), c("regr.FDboost", "regr.rpart", "regr.featureless")) expect_numeric(as.data.frame(bmr2)$mse, lower = 0L, upper = Inf) @@ -371,7 +383,7 @@ test_that("benchmarking on fda tasks works", { test_that("makeFunctionalData for matricies contained in data.frame", { df = getTaskData(fuelsubset.task, functionals.as = "matrix") df2 = makeFunctionalData(df, fd.features = list("UVVIS" = "UVVIS", "NIR" = "NIR"), - exclude.cols = c("heatan", "h20")) + exclude.cols = c("heatan", "h20")) expect_equivalent(df, df2) df = data.frame(matrix(rnorm(100), ncol = 10L)) diff --git a/tests/testthat/test_base_fda_extractFDAFeatures.R b/tests/testthat/test_base_fda_extractFDAFeatures.R index fa4680c6df..a733053468 100644 --- a/tests/testthat/test_base_fda_extractFDAFeatures.R +++ b/tests/testthat/test_base_fda_extractFDAFeatures.R @@ -20,7 +20,7 @@ test_that("extractFeatures multiple times", { expect_true(nrow(df) == 129L) # expect_true(ncol(df) == 6L) # expect_subset(colnames(df), c("UVVIS.mean", "UVVIS.min", "UVVIS.max", "heatan", - # "h20", "NIR.mean")) + # "h20", "NIR.mean")) methods = list("all" = extractFDAMultiResFeatures(), "all" = extractFDAFourier()) t = extractFDAFeatures(fuelsubset.task, feat.methods = methods) @@ -30,7 +30,7 @@ test_that("extractFeatures multiple times", { expect_true(nrow(df) == 129L) # expect_true(ncol(df) == 8L) # expect_subset(colnames(df), c("UVVIS.mean", "UVVIS.min", "UVVIS.max", "heatan", - # "h20", "NIR.mean", "NIR.min", "NIR.max")) + # "h20", "NIR.mean", "NIR.min", "NIR.max")) }) @@ -47,7 +47,11 @@ test_that("Wrong methods yield errors", { t = subsetTask(fuelsubset.task, subset = 1:2) wrng1 = function() { - lrn = function(data, target, col, vals = NULL) {1} + + lrn = function(data, target, col, vals = NULL) { + + 1 + } makeExtractFDAFeatMethod(learn = lrn, reextract = lrn) } expect_error(extractFDAFeatures(t, feat.methods = list("NIR" = wrng1())), @@ -55,14 +59,22 @@ test_that("Wrong methods yield errors", { wrng2 = function() { - lrn = function(data) {data[, 1]} + + lrn = function(data) { + + data[, 1] + } makeExtractFDAFeatMethod(learn = lrn, reextract = lrn) } expect_error(extractFDAFeatures(t, feat.methods = list("NIR" = wrng2())), "Must have formal arguments") wrng3 = function() { - lrn = function(data, target, col, vals = NULL) {data.frame(1)} + + lrn = function(data, target, col, vals = NULL) { + + data.frame(1) + } makeExtractFDAFeatMethod(z = lrn, rz = lrn) } expect_error(extractFDAFeatures(t, feat.methods = list("NIR" = wrng3())), @@ -141,6 +153,7 @@ test_that("Wavelet method are equal to package", { df = BBmisc::convertRowsToList(gp$data[, "fd", drop = FALSE]) set.seed(getOption("mlr.debug.seed")) wtdata = t(BBmisc::dapply(df, fun = function(x) { + wt = wavelets::dwt(as.numeric(x), filter = "haar", boundary = "reflection") unlist(c(wt@W, wt@V[[wt@level]])) })) @@ -168,7 +181,7 @@ test_that("extract and reextract Wavelets", { test_that("getUniFDAMultiResFeatures works on data.frame", { i = 100 # number of instances - tl = 200 # length of each time serie instance + tl = 200 # length of each time serie instance ts = replicate(i, rnorm(tl)) gp = t(as.data.frame(ts)) ngp = extractFDAMultiResFeatures()$learn(data = gp, res.level = 3, shift = 0.5, curve.lens = NULL) @@ -260,7 +273,7 @@ test_that("Fourier equal to package", { fourier.gp = lrn(data = gp1, trafo.coeff = "phase") expect_equal(nrow(fourier.gp), nrow(gp1)) # Phase (arctan(...) in range(-pi/2, pi/2) ) - expect_true(all(fourier.gp < pi / 2 & fourier.gp > - pi / 2)) + expect_true(all(fourier.gp < pi / 2 & fourier.gp > -pi / 2)) fourier.a.gp = lrn(data = gp1, trafo.coeff = "amplitude") expect_equal(nrow(fourier.a.gp), nrow(gp1)) @@ -286,6 +299,6 @@ test_that("Fourier equal to package", { expect_equal(df, fourier.a.gp) # Can not have factors - gp2 = data.frame(v1 = t(1:4), X1 = as.factor(1)) + gp2 = data.frame(v1 = t(1:4), X1 = as.factor(1)) expect_error(extractFourierFeatures(data = gp2, trafo.coeff = "amplitude")) }) diff --git a/tests/testthat/test_base_fixed_indices_cv.R b/tests/testthat/test_base_fixed_indices_cv.R index 6be5c663b1..5ad88361c2 100644 --- a/tests/testthat/test_base_fixed_indices_cv.R +++ b/tests/testthat/test_base_fixed_indices_cv.R @@ -16,7 +16,6 @@ test_that("fixed in single resampling", { expect_length(unique(unlist(p$instance$test.inds, use.names = FALSE)), 150) # check if correct indices are together (one fold is enough) expect_equal(p$instance$test.inds[[1]], c(11, 41, 71, 101, 131)) - }) test_that("fixed in nested resampling", { @@ -27,8 +26,8 @@ test_that("fixed in nested resampling", { # test fixed in nested resampling lrn = makeLearner("classif.lda") - ctrl <- makeTuneControlRandom(maxit = 2) - ps <- makeParamSet(makeNumericParam("nu", lower = 2, upper = 20)) + ctrl = makeTuneControlRandom(maxit = 2) + ps = makeParamSet(makeNumericParam("nu", lower = 2, upper = 20)) inner = makeResampleDesc("CV", iters = 4, fixed = TRUE) outer = makeResampleDesc("CV", iters = 5, fixed = TRUE) tune_wrapper = makeTuneWrapper(lrn, resampling = inner, par.set = ps, @@ -57,5 +56,4 @@ test_that("fixed in nested resampling", { p = resample(tune_wrapper, ct, outer, show.info = FALSE, extract = getTuneResult) expect_length(getResamplingIndices(p, inner = TRUE)[[1]][[1]], 6) - }) diff --git a/tests/testthat/test_base_generateFeatureImportanceData.R b/tests/testthat/test_base_generateFeatureImportanceData.R index bf6b9a943e..49dcfded9d 100644 --- a/tests/testthat/test_base_generateFeatureImportanceData.R +++ b/tests/testthat/test_base_generateFeatureImportanceData.R @@ -1,5 +1,4 @@ test_that("generateFeatureImportanceData", { - regr.imp = generateFeatureImportanceData(regr.task, "permutation.importance", "regr.rpart", c("lstat", "crim"), FALSE, mse, function(x, y) abs(x - y), median, 1L, TRUE, FALSE) diff --git a/tests/testthat/test_base_generateFilterValuesData.R b/tests/testthat/test_base_generateFilterValuesData.R index b748444231..cf22767ad0 100644 --- a/tests/testthat/test_base_generateFilterValuesData.R +++ b/tests/testthat/test_base_generateFilterValuesData.R @@ -38,7 +38,7 @@ test_that("filterFeatures", { expect_equal(ns, feat.imp.new$data$name) f = filterFeatures(binaryclass.task, method = "variance", abs = 5L) expect_true(setequal(getTaskFeatureNames(f), - head(sortByCol(feat.imp.new$data, "variance", asc = FALSE), 5L)$name)) + head(sortByCol(feat.imp.new$data, "variance", asc = FALSE), 5L)$name)) # now check that we get the same result by operating on generateFilterValuesData feat.imp.new = generateFilterValuesData(binaryclass.task, method = "variance") ff = filterFeatures(binaryclass.task, fval = feat.imp.new, abs = 5L) @@ -98,7 +98,8 @@ test_that("args are passed down to filter methods", { # we had an issue here, se nselect = 3, more.args = list(univariate.model.score = list(perf.learner = "regr.lm"))) # create stupid dummy data and check that we can change the na.rm arg of filter "variance" in multiple ways - d = iris; d[1L, 1L] = NA_real_ + d = iris + d[1L, 1L] = NA_real_ task = makeClassifTask(data = d, target = "Species") f1 = generateFilterValuesData(task, method = "variance", na.rm = FALSE) @@ -127,12 +128,13 @@ test_that("filter values are named and ordered correctly", { # we had an issue h supported.tasks = c("classif", "regr", "surv"), supported.features = c("numerics", "factors"), fun = function(task, nselect) { + ns = getTaskFeatureNames(task) d = seq_along(ns) names(d) = ns d = c(d[-1], d[1]) d - }) + }) fv = generateFilterValuesData(regr.task, method = "mock.filter") expect_equal(fv$data$name, ns) expect_equal(fv$data$mock.filter, seq_along(ns)) diff --git a/tests/testthat/test_base_generateHyperParsEffect.R b/tests/testthat/test_base_generateHyperParsEffect.R index 159654c864..cb9d9c5a5f 100644 --- a/tests/testthat/test_base_generateHyperParsEffect.R +++ b/tests/testthat/test_base_generateHyperParsEffect.R @@ -29,7 +29,7 @@ test_that("generate data", { test_that("1 numeric hyperparam", { # generate data - ps = makeParamSet(makeDiscreteParam("C", values = 2^ (-2:2))) + ps = makeParamSet(makeDiscreteParam("C", values = 2^(-2:2))) ctrl = makeTuneControlGrid() rdesc = makeResampleDesc("Holdout") res = tuneParams("classif.ksvm", task = pid.task, resampling = rdesc, diff --git a/tests/testthat/test_base_generateLearningCurve.R b/tests/testthat/test_base_generateLearningCurve.R index 038c437a2a..d47de2f82b 100644 --- a/tests/testthat/test_base_generateLearningCurve.R +++ b/tests/testthat/test_base_generateLearningCurve.R @@ -43,7 +43,6 @@ test_that("generateLearningCurve", { testFacetting(q, nrow = 2L) q = plotLearningCurve(r, facet.wrap.ncol = 2L, facet = "learner") testFacetting(q, ncol = 2L) - }) test_that("generateLearningCurve works if single learner is passed (not wrapped in list)", { diff --git a/tests/testthat/test_base_generateThreshVsPerf.R b/tests/testthat/test_base_generateThreshVsPerf.R index a20867f871..f93f95b90d 100644 --- a/tests/testthat/test_base_generateThreshVsPerf.R +++ b/tests/testthat/test_base_generateThreshVsPerf.R @@ -116,7 +116,7 @@ test_that("generateThreshVsPerfData", { mcm = matrix(sample(0:3, size = (length(classes))^2, TRUE), ncol = length(classes)) rownames(mcm) = colnames(mcm) = classes costs = makeCostMeasure(id = "asym.costs", name = "Asymmetric costs", - minimize = TRUE, costs = mcm, combine = mean) + minimize = TRUE, costs = mcm, combine = mean) pvs.custom = generateThreshVsPerfData(pred, costs) plotThreshVsPerf(pvs.custom) ggplot2::ggsave(path) diff --git a/tests/testthat/test_base_getCaretParamSet.R b/tests/testthat/test_base_getCaretParamSet.R index 1cb3ad51ca..7bf122cf73 100644 --- a/tests/testthat/test_base_getCaretParamSet.R +++ b/tests/testthat/test_base_getCaretParamSet.R @@ -3,16 +3,22 @@ context("getCaretParamSet") test_that("getCaretParamSet", { requirePackagesOrSkip(c("caret", "rpart", "earth")) checkCaretParams = function(lrn, k, task) { + set.seed(123) - a = capture.output({cps1 = getCaretParamSet(lrn, length = k, task = task, discretize = TRUE)}) + a = capture.output({ + cps1 = getCaretParamSet(lrn, length = k, task = task, discretize = TRUE) + }) set.seed(123) - b = capture.output({cps2 = getCaretParamSet(lrn, length = k, task = task, discretize = FALSE)}) + b = capture.output({ + cps2 = getCaretParamSet(lrn, length = k, task = task, discretize = FALSE) + }) expect_identical(cps1$par.vals, cps2$par.vals) expect_identical(names(cps1$par.set$pars), names(cps2$par.set$pars)) expect_identical(class(cps1$par.set), "ParamSet") expect_identical(class(cps2$par.set), "ParamSet") - if (!is.null(cps1$par.vals)) + if (!is.null(cps1$par.vals)) { expect_identical(class(cps1$par.vals), "list") + } } caret.learners = c("gbm", "rf", "svmPoly", "svmLinear", "svmRadial", diff --git a/tests/testthat/test_base_getFeatureImportance.R b/tests/testthat/test_base_getFeatureImportance.R index c147ccffcc..f1bd423a35 100644 --- a/tests/testthat/test_base_getFeatureImportance.R +++ b/tests/testthat/test_base_getFeatureImportance.R @@ -2,7 +2,7 @@ context("getFeatureImportance") test_that("getFeatureImportance", { - #type 2 for random Forest should work without setting importance + # type 2 for random Forest should work without setting importance lrn = makeLearner("classif.randomForest") mod = train(lrn, binaryclass.task) feat.imp = getFeatureImportance(mod, type = 2)$res @@ -10,7 +10,7 @@ test_that("getFeatureImportance", { any.missing = FALSE, nrows = 1, ncols = getTaskNFeats(binaryclass.task)) expect_equal(colnames(feat.imp), mod$features) - #type 1 shouldn't + # type 1 shouldn't expect_error(getFeatureImportance(mod, type = 1), regexp = ".*importance.*TRUE") lrn = setHyperPars(lrn, importance = TRUE) @@ -20,7 +20,7 @@ test_that("getFeatureImportance", { any.missing = FALSE, nrows = 1, ncols = getTaskNFeats(binaryclass.task)) expect_equal(colnames(feat.imp), mod$features) - #regression learner + # regression learner lrn = makeLearner("regr.gbm") mod = train(lrn, regr.task) feat.imp = getFeatureImportance(mod)$res @@ -28,7 +28,7 @@ test_that("getFeatureImportance", { any.missing = FALSE, nrows = 1, ncols = getTaskNFeats(regr.task)) expect_equal(colnames(feat.imp), mod$features) - #wrapped learner + # wrapped learner lrn = makeFilterWrapper(makeLearner("regr.gbm"), fw.method = "FSelectorRcpp_information.gain", fw.abs = 2) mod = train(lrn, regr.task) feat.imp = getFeatureImportance(mod)$res @@ -36,8 +36,8 @@ test_that("getFeatureImportance", { any.missing = FALSE, nrows = 1, ncols = getTaskNFeats(regr.task)) expect_equal(colnames(feat.imp), mod$features) - #For learners without the possibility to calculate feature importance a meaningfull error should - #be returned + # For learners without the possibility to calculate feature importance a meaningfull error should + # be returned lrn = makeLearner("classif.qda") mod = train(lrn, binaryclass.task) expect_error(getFeatureImportance(mod), regexp = "does not support 'featimp'") diff --git a/tests/testthat/test_base_getHyperPars.R b/tests/testthat/test_base_getHyperPars.R index 6f23fbb333..9bfc82926f 100644 --- a/tests/testthat/test_base_getHyperPars.R +++ b/tests/testthat/test_base_getHyperPars.R @@ -22,7 +22,7 @@ test_that("getHyperPars", { lrn = makeMultilabelBinaryRelevanceWrapper("classif.rpart") expect_true(setequal(getHyperPars(lrn), list(xval = 0))) - #Missing values should not be omitted and printed + # Missing values should not be omitted and printed lrn = makeLearner("classif.xgboost", missing = NA) expect_output(print(lrn), "missing=NA") lrn = makeLearner("regr.xgboost", missing = NA) diff --git a/tests/testthat/test_base_getOOBPreds.R b/tests/testthat/test_base_getOOBPreds.R index 69d639082a..13791f58d2 100644 --- a/tests/testthat/test_base_getOOBPreds.R +++ b/tests/testthat/test_base_getOOBPreds.R @@ -19,6 +19,4 @@ test_that("getOOBPreds", { expect_equal(names(oob$data), names(pred$data)) expect_equal(names(oob), names(pred)) } - }) - diff --git a/tests/testthat/test_base_getTaskData.R b/tests/testthat/test_base_getTaskData.R index 561bfad565..e220f3721b 100644 --- a/tests/testthat/test_base_getTaskData.R +++ b/tests/testthat/test_base_getTaskData.R @@ -20,7 +20,7 @@ test_that("getTaskData", { expect_true(is.numeric(df[, binaryclass.target])) expect_equal(sum(df[, binaryclass.target] == 1), sum(binaryclass.df[, binaryclass.target] == td$positive)) - expect_equal(sum(df[, binaryclass.target] == - 1), + expect_equal(sum(df[, binaryclass.target] == -1), sum(binaryclass.df[, binaryclass.target] == td$negative)) df = getTaskData(multilabel.task, recode.target = "multilabel.factor") expect_true(all(sapply(df[, multilabel.target], is.factor))) diff --git a/tests/testthat/test_base_helpLearner.R b/tests/testthat/test_base_helpLearner.R index 066a452b3b..07dec5635d 100644 --- a/tests/testthat/test_base_helpLearner.R +++ b/tests/testthat/test_base_helpLearner.R @@ -14,13 +14,21 @@ test_that("helpLearner of learner with multiple help pages", { testfn = helpLearner environment(testfn) = new.env(parent = environment(testfn)) - environment(testfn)$readline = function(x) { cat(x, "\n") ; 0 } + environment(testfn)$readline = function(x) { + + cat(x, "\n") + 0 + } expect_output(testfn("classif.qda"), "Choose help page:(\\n[0-9]+ : [0-9a-zA-Z._]+)+\\n\\.\\.\\.: *$") expect_null(quiet(testfn("classif.qda"))) - environment(testfn)$readline = function(x) { cat(x, "\n") ; 1 } + environment(testfn)$readline = function(x) { + + cat(x, "\n") + 1 + } hlp1 = quiet(testfn("classif.qda")) @@ -33,7 +41,11 @@ test_that("helpLearner of learner with multiple help pages", { expect_equivalent(rfhelp, quiet(testfn("regr.randomForest"))) } - environment(testfn)$readline = function(x) { cat(x, "\n") ; 2 } + environment(testfn)$readline = function(x) { + + cat(x, "\n") + 2 + } hlp3 = quiet(testfn("classif.qda")) @@ -43,7 +55,6 @@ test_that("helpLearner of learner with multiple help pages", { # regr.randomForest with option '2' should give the randomForest help page. expect_true(length(quiet(testfn("regr.randomForest"))) == 1) - }) test_that("helpLearner of wrapped learner", { @@ -85,12 +96,12 @@ test_that("helpLearnerParam", { # check that values are printed expect_output(helpLearnerParam( makeLearner("classif.qda", nu = 3), "nu"), - "Value: +3") + "Value: +3") # values for vectorial params work expect_output(helpLearnerParam( makeLearner("classif.randomForest", cutoff = c(.1, .2, .3)), "cutoff"), - "Value:.+0\\.1.+0\\.2.+0\\.3") + "Value:.+0\\.1.+0\\.2.+0\\.3") }) test_that("helpLearnerParam of wrapped learner", { @@ -105,5 +116,4 @@ test_that("helpLearnerParam of wrapped learner", { "is a wrapped learner. Showing documentation of 'classif.qda' instead", fixed = TRUE, all = TRUE) expect_message(quiet(helpLearnerParam(w2)), "is a wrapped learner. Showing documentation of 'classif.qda' instead", fixed = TRUE, all = TRUE) - }) diff --git a/tests/testthat/test_base_helpers.R b/tests/testthat/test_base_helpers.R index 5aaea78249..e3def73397 100644 --- a/tests/testthat/test_base_helpers.R +++ b/tests/testthat/test_base_helpers.R @@ -35,8 +35,10 @@ test_that("listLearnerProperties", { test_that("suppressWarning works", { foo = function(x) { - if (x > 3) + + if (x > 3) { warning("x is pretty large.") + } x } diff --git a/tests/testthat/test_base_imbal_overbagging.R b/tests/testthat/test_base_imbal_overbagging.R index f924bfcba3..c983834e37 100644 --- a/tests/testthat/test_base_imbal_overbagging.R +++ b/tests/testthat/test_base_imbal_overbagging.R @@ -1,6 +1,6 @@ context("overbagging") -test_that("OverBagging wrapper", { +test_that("OverBagging wrapper", { rdesc = makeResampleDesc("CV", iters = 2) lrn1 = makeLearner("classif.rpart") lrn2 = makeOverBaggingWrapper(lrn1, obw.rate = 2) @@ -15,37 +15,39 @@ test_that("OverBagging wrapper arg check works", { }) test_that("oversampling in each bag works", { - y = binaryclass.df[, binaryclass.target] - tab1 = table(y) - task = makeClassifTask(data = binaryclass.df, target = binaryclass.target) - lrn1 = makeLearner("classif.rpart") - lrn2 = makeOverBaggingWrapper(lrn1, obw.rate = 5, obw.iters = 3) - mod = train(lrn2, task) - models = getLearnerModel(mod) - - # check min class size gets increased by rate/factor 5 - tab = lapply(seq_along(models), function(i) { - data = getTaskData(task, models[[i]]$subset) - tab = table(data[, binaryclass.target]) - expect_equal(tab1["M"], tab["M"]) - expect_equal(tab1["R"], round(tab["R"] / 5)) - }) + y = binaryclass.df[, binaryclass.target] + tab1 = table(y) + task = makeClassifTask(data = binaryclass.df, target = binaryclass.target) + lrn1 = makeLearner("classif.rpart") + lrn2 = makeOverBaggingWrapper(lrn1, obw.rate = 5, obw.iters = 3) + mod = train(lrn2, task) + models = getLearnerModel(mod) + + # check min class size gets increased by rate/factor 5 + tab = lapply(seq_along(models), function(i) { + + data = getTaskData(task, models[[i]]$subset) + tab = table(data[, binaryclass.target]) + expect_equal(tab1["M"], tab["M"]) + expect_equal(tab1["R"], round(tab["R"] / 5)) + }) }) test_that("oversampling bigger class works", { - y = binaryclass.df[, binaryclass.target] - z = getMinMaxClass(y) - tab1 = table(y) - task = makeClassifTask(data = binaryclass.df, target = binaryclass.target) - lrn1 = makeLearner("classif.rpart") - lrn2 = makeOverBaggingWrapper(lrn1, obw.rate = 5, obw.iters = 3, obw.cl = z$max.name) - mod = train(lrn2, task) - models = getLearnerModel(mod) - - tab = lapply(seq_along(models), function(i) { - data = getTaskData(task, models[[1]]$subset) - tab = table(data[, binaryclass.target]) - expect_equal(tab1["R"], tab["R"]) - expect_equal(tab1["M"], round(tab["M"] / 5)) - }) + y = binaryclass.df[, binaryclass.target] + z = getMinMaxClass(y) + tab1 = table(y) + task = makeClassifTask(data = binaryclass.df, target = binaryclass.target) + lrn1 = makeLearner("classif.rpart") + lrn2 = makeOverBaggingWrapper(lrn1, obw.rate = 5, obw.iters = 3, obw.cl = z$max.name) + mod = train(lrn2, task) + models = getLearnerModel(mod) + + tab = lapply(seq_along(models), function(i) { + + data = getTaskData(task, models[[1]]$subset) + tab = table(data[, binaryclass.target]) + expect_equal(tab1["R"], tab["R"]) + expect_equal(tab1["M"], round(tab["M"] / 5)) + }) }) diff --git a/tests/testthat/test_base_imbal_overundersample.R b/tests/testthat/test_base_imbal_overundersample.R index 44c4c06b2c..13ea646e17 100644 --- a/tests/testthat/test_base_imbal_overundersample.R +++ b/tests/testthat/test_base_imbal_overundersample.R @@ -1,6 +1,6 @@ context("overundersample") -test_that("over and undersample works", { +test_that("over and undersample works", { y = binaryclass.df[, binaryclass.target] tab1 = table(y) task = oversample(binaryclass.task, rate = 2) @@ -15,7 +15,7 @@ test_that("over and undersample works", { expect_equal(tab2["R"], tab1["R"]) }) -test_that("over and undersample wrapper", { +test_that("over and undersample wrapper", { rdesc = makeResampleDesc("CV", iters = 2) lrn1 = makeLearner("classif.rpart") lrn2 = makeUndersampleWrapper(lrn1, usw.rate = 0.5) @@ -48,7 +48,7 @@ test_that("oversampling keeps all min / max obs", { test_that("control which class gets over or under sampled", { set.seed(getOption("mlr.debug.seed")) - #check function oversample(), undersample() + # check function oversample(), undersample() y = binaryclass.df[, binaryclass.target] tab1 = table(y) z = getMinMaxClass(y) @@ -63,7 +63,7 @@ test_that("control which class gets over or under sampled", { expect_equal(tab2["R"], round(tab1["R"] / 2)) expect_equal(tab2["M"], tab1["M"]) - #check over- and undersample-wrapper + # check over- and undersample-wrapper z = getMinMaxClass(binaryclass.df[, binaryclass.target]) rdesc = makeResampleDesc("CV", iters = 2) lrn1 = makeLearner("classif.rpart") @@ -80,9 +80,9 @@ test_that("training performance works as expected (#1357)", { properties = c("classif", "classif.multi", "req.pred", "req.truth"), name = "Number", fun = function(task, model, pred, feats, extra.args) { + length(pred$data$response) - } - ) + }) y = binaryclass.df[, binaryclass.target] z = getMinMaxClass(y) @@ -138,5 +138,4 @@ test_that("Wrapper works with weights, we had issue #2047", { u = getLearnerModel(m, more.unwrap = TRUE)$weights expect_equal(length(u), 7) expect_subset(u, 1:10) - }) diff --git a/tests/testthat/test_base_imbal_smote.R b/tests/testthat/test_base_imbal_smote.R index 36920560af..ec2d4b76c8 100644 --- a/tests/testthat/test_base_imbal_smote.R +++ b/tests/testthat/test_base_imbal_smote.R @@ -1,6 +1,6 @@ context("smote") -test_that("smote works", { +test_that("smote works", { y = binaryclass.df[, binaryclass.target] tab1 = table(y) task = smote(binaryclass.task, rate = 2) @@ -20,7 +20,7 @@ test_that("smote works", { expect_error(smote(task, rate = 2, alt.logic = TRUE), "minimal class has size 3") }) -test_that("smote works with rate 1 (no new examples)", { +test_that("smote works with rate 1 (no new examples)", { y = binaryclass.df[, binaryclass.target] tab1 = table(y) task = smote(binaryclass.task, rate = 1) @@ -36,7 +36,7 @@ test_that("smote works with rate 1 (no new examples)", { expect_equal(tab2alt["R"], tab1["R"]) }) -test_that("smote works with only factor features", { +test_that("smote works with only factor features", { n = 10 d = data.frame( x1 = sample(c("a", "b"), n, replace = TRUE), @@ -50,7 +50,7 @@ test_that("smote works with only factor features", { expect_equal(getTaskSize(task3), 12) }) -test_that("smote wrapper", { +test_that("smote wrapper", { rdesc = makeResampleDesc("CV", iters = 2) lrn1 = makeLearner("classif.rpart") lrn2 = makeSMOTEWrapper(lrn1, sw.rate = 2) @@ -82,7 +82,7 @@ test_that("smote works with only integer features", { # FIXME: Reactivate test, the test is failing on an R CMD check but not test(). # Probably due to seeding problems. -#test_that("smote works with constant factor features", { +# test_that("smote works with constant factor features", { # # This reproduces the bug from issue #1951 # d = data.frame( # x1 = rpois(100, 2), @@ -94,5 +94,4 @@ test_that("smote works with only integer features", { # task2 = smote(task, rate = 9, nn = 4L) # # expect_equal(table(getTaskData(task2)$x2, getTaskData(task2)$y)[5, 1], 90) -#}) - +# }) diff --git a/tests/testthat/test_base_imbal_weightedclasses.R b/tests/testthat/test_base_imbal_weightedclasses.R index efe10788cd..acf2c1cb4e 100644 --- a/tests/testthat/test_base_imbal_weightedclasses.R +++ b/tests/testthat/test_base_imbal_weightedclasses.R @@ -1,8 +1,9 @@ context("weightedclasses") -test_that("WeightedClassesWrapper, binary", { +test_that("WeightedClassesWrapper, binary", { pos = getTaskDesc(binaryclass.task)$positive f = function(lrn, w) { + lrn1 = makeLearner(lrn) lrn2 = makeWeightedClassesWrapper(lrn1, wcw.weight = w) m = train(lrn2, binaryclass.task) @@ -11,9 +12,10 @@ test_that("WeightedClassesWrapper, binary", { } learners = paste("classif", c("ksvm", "LiblineaRL1L2SVC", "LiblineaRL2L1SVC", - "LiblineaRL2SVC", "LiblineaRL1LogReg", "LiblineaRL2LogReg", "LiblineaRMultiClassSVC", - "randomForest", "svm"), sep = ".") + "LiblineaRL2SVC", "LiblineaRL1LogReg", "LiblineaRL2LogReg", "LiblineaRMultiClassSVC", + "randomForest", "svm"), sep = ".") x = lapply(learners, function(lrn) { + cm1 = f(lrn, 0.001) cm2 = f(lrn, 1) cm3 = f(lrn, 1000) @@ -25,9 +27,10 @@ test_that("WeightedClassesWrapper, binary", { expect_error(f("classif.lda", 0.01)) }) -test_that("WeightedClassesWrapper, multiclass", { +test_that("WeightedClassesWrapper, multiclass", { levs = getTaskClassLevels(multiclass.task) f = function(lrn, w) { + lrn1 = makeLearner(lrn) lrn2 = makeWeightedClassesWrapper(lrn1, wcw.weight = w) m = train(lrn2, multiclass.task) @@ -36,9 +39,10 @@ test_that("WeightedClassesWrapper, multiclass", { } learners = paste("classif", c("ksvm", "LiblineaRL1L2SVC", "LiblineaRL2L1SVC", - "LiblineaRL2SVC", "LiblineaRL1LogReg", "LiblineaRL2LogReg", "LiblineaRMultiClassSVC", - "randomForest", "svm"), sep = ".") + "LiblineaRL2SVC", "LiblineaRL1LogReg", "LiblineaRL2LogReg", "LiblineaRMultiClassSVC", + "randomForest", "svm"), sep = ".") x = lapply(learners, function(lrn) { + classes = getTaskFactorLevels(multiclass.task)[[multiclass.target]] n = length(classes) cm1 = f(lrn, setNames(object = c(10000, 1, 1), classes)) @@ -59,15 +63,16 @@ test_that("WeightedClassesWrapper, multiclass", { context("getClassWeightParam") -test_that("getClassWeightParam", { +test_that("getClassWeightParam", { f = function(lrn) { + lrn1 = makeLearner(lrn) expect_is(getClassWeightParam(lrn), "LearnerParam") expect_is(getClassWeightParam(lrn1), "LearnerParam") } learners = paste("classif", c("ksvm", "LiblineaRL1L2SVC", "LiblineaRL2L1SVC", - "LiblineaRL2SVC", "LiblineaRL1LogReg", "LiblineaRL2LogReg", "LiblineaRMultiClassSVC", - "randomForest", "svm"), sep = ".") + "LiblineaRL2SVC", "LiblineaRL1LogReg", "LiblineaRL2LogReg", "LiblineaRMultiClassSVC", + "randomForest", "svm"), sep = ".") x = lapply(learners, f) }) diff --git a/tests/testthat/test_base_impute.R b/tests/testthat/test_base_impute.R index 853e4df694..2fc0dc1790 100644 --- a/tests/testthat/test_base_impute.R +++ b/tests/testthat/test_base_impute.R @@ -88,7 +88,7 @@ test_that("Impute data frame", { imputed = reimpute(data.frame(f = factor("newlvl"), x = NA_real_), x$desc) expect_equal(as.character(imputed$f), "xxx") expect_equal(imputed$x, 1) - #FIXME: y was never in input data? therefore next test fails? + # FIXME: y was never in input data? therefore next test fails? # expect_equal(imputed$y, 8) x = impute(data, target = target, cols = list(f = "xxx"), impute.new.levels = FALSE) @@ -110,7 +110,7 @@ test_that("Impute data frame", { expect_true(setequal(names(x), c(names(data), "x.dummy", "y.dummy", "z.dummy"))) x = impute(data, target = character(0), classes = list(factor = imputeMode(), numeric = imputeMedian(), - integer = imputeMedian(), logical = imputeConstant(1))) + integer = imputeMedian(), logical = imputeConstant(1))) expect_true(all(!is.na(x$data))) data2 = data[1:5, ] @@ -125,11 +125,11 @@ test_that("Impute and reimpute task", { data[6L, ] = NA classif.tar = factor(c(rep(c("a", "b"), 3L))) regr.tar = rep(c(.1, .2), 3L) - #additional data-frame to check reimpute + # additional data-frame to check reimpute data2 = data.frame(f = letters[c(2, 1, 1, 1, 1)], x = rep(2., 5), y = c(2, 4, 2, 3, 3)) data2[6L, ] = NA data2$z = classif.tar - #test classif task + # test classif task data$z = classif.tar data2$z = classif.tar classif.tsk = makeClassifTask(data = data, target = "z") diff --git a/tests/testthat/test_base_learnerArgsToControl.R b/tests/testthat/test_base_learnerArgsToControl.R index 40de9179d0..e1ad9551f3 100644 --- a/tests/testthat/test_base_learnerArgsToControl.R +++ b/tests/testthat/test_base_learnerArgsToControl.R @@ -1,8 +1,8 @@ context("learnerArgsToControl") test_that("learnerArgsToControl with list returns the input", { - checkLearnerArgsToControl = function(arg1, arg2, ...) { + learnerArgsToControl(control = list, arg1, arg2, ...) } @@ -12,21 +12,20 @@ test_that("learnerArgsToControl with list returns the input", { expect_equal(list(arg1 = arg1, arg2 = arg2), test1) # test missing values - arg1 = quote(expr = ) # nolint - arg2 = quote(expr = ) # nolint + arg1 = quote(expr = ) # nolint + arg2 = quote(expr = ) # nolint test2 = checkLearnerArgsToControl(arg1, arg2) expect_equal(list(), test2) # test for dots test3 = checkLearnerArgsToControl(arg1, arg2, arg3 = 1000) expect_equal(list(arg3 = 1000), test3) - }) test_that("learnerArgsToControl works with a control object", { - checkLearnerArgsToControlWithControl = function(fdev, devmax, ...) { + learnerArgsToControl(control = glmnet::glmnet.control, fdev, devmax, ...) } @@ -38,9 +37,8 @@ test_that("learnerArgsToControl works with a control object", { test1 = checkLearnerArgsToControlWithControl(fdev, devmax) expect_equal(test1, glmnet::glmnet.control(fdev = fdev, devmax = devmax)) - devmax = quote(expr = ) # nolint + devmax = quote(expr = ) # nolint glmnet::glmnet.control(factory = TRUE) test2 = checkLearnerArgsToControlWithControl(fdev, devmax, mnlam = 3) expect_equal(test2, glmnet::glmnet.control(fdev = fdev, devmax = devmax, mnlam = 3)) }) - diff --git a/tests/testthat/test_base_makeLearners.R b/tests/testthat/test_base_makeLearners.R index b941ac60ed..027184ec4a 100644 --- a/tests/testthat/test_base_makeLearners.R +++ b/tests/testthat/test_base_makeLearners.R @@ -18,4 +18,3 @@ test_that("makeLearners", { res = makeLearners(cls2, type = "classif", predict.type = "prob") expect_equal(res, lrns3) }) - diff --git a/tests/testthat/test_base_makeTask.R b/tests/testthat/test_base_makeTask.R index c656dad60e..c494d6d60c 100644 --- a/tests/testthat/test_base_makeTask.R +++ b/tests/testthat/test_base_makeTask.R @@ -1,7 +1,7 @@ test_that("makeXXTask ignores bad columns if check.data = FALSE", { - constructTask = function(data, target, type, id) { + constructor = switch(type, classif = makeClassifTask, multilabel = makeMultilabelTask, @@ -25,6 +25,4 @@ test_that("makeXXTask ignores bad columns if check.data = FALSE", { for (type in names(constructors)) { expect_error(constructors[[type]](), "Unsupported feature type (logical) in column 'x'", fixed = TRUE) } - }) - diff --git a/tests/testthat/test_base_measures.R b/tests/testthat/test_base_measures.R index c31b509426..b4df59a8f1 100644 --- a/tests/testthat/test_base_measures.R +++ b/tests/testthat/test_base_measures.R @@ -5,11 +5,11 @@ test_that("measures", { options(warn = 2) mymeasure = makeMeasure(id = "foo", minimize = TRUE, properties = c("classif", "classif.multi", "regr", "predtype.response", "predtype.prob"), - fun = function(task, model, pred, feats, extra.args) { - tt = pred - 1 - } - ) + fun = function(task, model, pred, feats, extra.args) { + + tt = pred + 1 + }) ms = list(mmce, acc, bac, tp, fp, tn, fn, tpr, fpr, tnr, fnr, ppv, npv, mcc, f1, mymeasure) lrn = makeLearner("classif.rpart") @@ -52,6 +52,7 @@ test_that("measures", { pred = predict(mod, task = surv.task, subset = surv.test.inds) perf = performance(pred, model = mod, task = surv.task, measures = ms) Map(function(measure, perf) { + r = range(measure$worst, measure$best) expect_number(perf, lower = r[1], upper = r[2], label = measure$id) }, measure = ms, perf = perf) @@ -63,6 +64,7 @@ test_that("measures", { res = resample(lrn, task, resampling = rin, measures = ms)$aggr expect_numeric(res, any.missing = FALSE) Map(function(measure) { + r = range(measure$worst, measure$best) expect_number(res[[sprintf("%s.test.mean", measure$id)]], lower = r[1], upper = r[2], label = measure$id) }, measure = ms) @@ -86,7 +88,8 @@ test_that("measures with same id still work", { }) test_that("ber with faulty model produces NA", { - data = iris; data[, 1] = 1 + data = iris + data[, 1] = 1 lrn = makeLearner("classif.lda", config = list(on.learner.error = "quiet")) task = makeClassifTask(data = data, target = "Species") r = holdout(lrn, task, measures = ber) @@ -117,6 +120,7 @@ test_that("mcc is implemented correctly", { # see issue 363 test_that("listMeasures", { mycheck = function(type) { + xs = listMeasures(type, create = TRUE) expect_true(is.list(xs) && length(xs) > 0L, info = type) expect_true(all(vlapply(xs, inherits, what = "Measure")), info = type) @@ -130,11 +134,11 @@ test_that("listMeasures", { }) test_that("check measure calculations", { - #tiny datasets for testing - #features + # tiny datasets for testing + # features var1 = c(1, 2, 3, 4) var2 = c(3, 4, 1, 2) - #for regression + # for regression tar.regr = c(5, 10, 0, 5) pred.art.regr = c(4, 11, 0, 4) data.regr = data.frame(var1, var2, tar.regr) @@ -143,7 +147,7 @@ test_that("check measure calculations", { mod.regr = train(lrn.regr, task.regr) pred.regr = predict(mod.regr, task.regr) pred.regr$data$response = pred.art.regr - #for multiclass + # for multiclass tar.classif = factor(c(1L, 2L, 0L, 1L)) pred.art.classif = factor(c(1L, 1L, 0L, 2L)) data.classif = data.frame(var1, var2, tar.classif) @@ -152,7 +156,7 @@ test_that("check measure calculations", { mod.classif = train(lrn.classif, task.classif) pred.classif = predict(mod.classif, task.classif) pred.classif$data$response = pred.art.classif - #for binaryclass + # for binaryclass tar.bin = factor(c(1L, 0L, 0L, 1L)) pred.art.bin = factor(c(1L, 1L, 0L, 0L)) data.bin = data.frame(var1, var2, tar.bin) @@ -161,7 +165,7 @@ test_that("check measure calculations", { mod.bin = train(lrn.bin, task.bin) pred.bin = predict(mod.bin, task.bin) pred.bin$data$response = pred.art.bin - #for multilabel + # for multilabel tar1.multilabel = c(TRUE, FALSE, FALSE, TRUE) tar2.multilabel = c(TRUE, TRUE, FALSE, TRUE) pred.art.multilabel = cbind(c(TRUE, FALSE, FALSE, FALSE), c(FALSE, TRUE, FALSE, TRUE)) @@ -172,7 +176,7 @@ test_that("check measure calculations", { mod.multilabel = train(lrn.multilabel, task.multilabel) pred.multilabel = predict(mod.multilabel, task.multilabel) pred.multilabel$data[, 4:5] = pred.art.multilabel - #for survival + # for survival time.surv = c(5, 10, 5, 10) status.surv = c(TRUE, FALSE, TRUE, FALSE) pred.art.surv = c(1, -1, 1, 1) @@ -186,7 +190,7 @@ test_that("check measure calculations", { }) pred.surv = predict(mod.surv, task.surv) pred.surv$data[, "response"] = pred.art.surv - #for costsensitive + # for costsensitive tar.costsens = factor(c("a", "b", "c", "a")) pred.art.costsens = factor(c("a", "b", "c", "c")) data.costsens = data.frame(var1, var2) @@ -199,7 +203,7 @@ test_that("check measure calculations", { mod.costsens = train(lrn.costsens, task.costsens) pred.costsens = predict(mod.costsens, task = task.costsens) pred.costsens$data$response = pred.art.costsens - #for clustering + # for clustering pred.art.cluster = c(1L, 1L, 2L, 1L) data.cluster = data.frame(var1, var2) task.cluster = makeClusterTask(data = data.cluster) @@ -208,41 +212,41 @@ test_that("check measure calculations", { pred.cluster = predict(mod.cluster, task.cluster) pred.cluster$data$response = pred.art.cluster - #test regression measures + # test regression measures - #sse + # sse sq.errs = c(5 - 4, 10 - 11, 0 - 0, 5 - 4)^2L sse.test = sum(sq.errs) sse.perf = performance(pred.regr, measures = sse, model = mod.regr) expect_equal(sse.test, sse$fun(pred = pred.regr)) expect_equal(sse.test, as.numeric(sse.perf)) - #mse + # mse mse.test = mean(sq.errs) mse.perf = performance(pred.regr, measures = mse, model = mod.regr) expect_equal(mse.test, mse$fun(pred = pred.regr)) expect_equal(mse.test, as.numeric(mse.perf)) - #rmse + # rmse rmse.test = sqrt(mse.test) rmse.perf = performance(pred.regr, measures = rmse, model = mod.regr) expect_equal(rmse.test, rmse$fun(pred = pred.regr)) expect_equal(rmse.test, as.numeric(rmse.perf)) - #medse + # medse medse.test = median(sq.errs) medse.perf = performance(pred.regr, measures = medse, model = mod.regr) expect_equal(medse.test, medse$fun(pred = pred.regr)) expect_equal(medse.test, as.numeric(medse.perf)) - #sae + # sae abs.errs = abs(c(5 - 4, 10 - 11, 0 - 0, 5 - 4)) sae.test = sum(abs.errs) sae.perf = performance(pred.regr, measures = sae, model = mod.regr) expect_equal(sae.test, sae$fun(pred = pred.regr)) expect_equal(sae.test, as.numeric(sae.perf)) - #mae + # mae mae.test = mean(abs.errs) mae.perf = performance(pred.regr, measures = mae, model = mod.regr) expect_equal(mae.test, mae$fun(pred = pred.regr)) expect_equal(mae.test, as.numeric(mae.perf)) - #medae + # medae medae.test = median(abs.errs) medae.perf = performance(pred.regr, measures = medae, model = mod.regr) expect_equal(medae.test, medae$fun(pred = pred.regr)) @@ -319,7 +323,7 @@ test_that("check measure calculations", { expect_warning(mape$fun(pred = pred.regr), regexp = "Measure is undefined if any truth value is equal to 0.") expect_warning(measureMAPE(c(5, 10, 0, 5), c(4, 11, 0, 4)), regexp = "Measure is undefined if any truth value is equal to 0.") pred.regr.mape = pred.regr - pred.regr.mape$data$truth = c(5, 10, 1, 5) #we change the 0 target because mape is undefined + pred.regr.mape$data$truth = c(5, 10, 1, 5) # we change the 0 target because mape is undefined mape.perf = performance(pred.regr.mape, measures = mape, model = mod.regr) mape.test = mean(c(abs((5 - 4) / 5), abs((10 - 11) / 10), abs((1 - 0) / 1), abs((5 - 4) / 5))) expect_equal(mape.test, mape$fun(pred = pred.regr.mape)) @@ -330,7 +334,7 @@ test_that("check measure calculations", { expect_silent(measureMAPE(c(1, 1, 1, 1), c(2, 2, 2, 2))) # msle msle.test = ((log(4 + 1) - log(5 + 1))^2 + (log(11 + 1) - log(10 + 1))^2 + -(log(0 + 1) - log(0 + 1))^2 + (log(4 + 1) - log(5 + 1))^2) / 4 + (log(0 + 1) - log(0 + 1))^2 + (log(4 + 1) - log(5 + 1))^2) / 4 msle.perf = performance(pred.regr, measures = msle, model = mod.regr) expect_equal(msle.test, msle$fun(pred = pred.regr)) expect_equal(msle.test, as.numeric(msle.perf)) @@ -344,25 +348,25 @@ test_that("check measure calculations", { rmsle.perf = performance(pred.regr, measures = rmsle, model = mod.regr) expect_equal(rmsle.test, rmsle$fun(pred = pred.regr)) expect_equal(rmsle.test, as.numeric(rmsle.perf)) - #tau + # tau tau.test = 1 tau.perf = performance(pred.regr, measures = kendalltau, model = mod.regr) expect_equal(tau.test, kendalltau$fun(pred = pred.regr)) expect_equal(tau.test, as.numeric(tau.perf)) - #rho + # rho rho.test = 1 rho.perf = performance(pred.regr, measures = spearmanrho, model = mod.regr) expect_equal(rho.test, spearmanrho$fun(pred = pred.regr)) expect_equal(rho.test, as.numeric(rho.perf)) - #test multiclass measures + # test multiclass measures - #mmce + # mmce mmce.test = mean(c(1L != 1L, 2L != 1L, 0L != 0L, 1L != 2L)) mmce.perf = performance(pred.classif, measures = mmce, model = mod.classif) expect_equal(mmce.test, mmce$fun(pred = pred.classif)) expect_equal(mmce.test, as.numeric(mmce.perf)) - #acc + # acc acc.test = mean(c(1L != 1L, 2L != 0L, 0L != 0L, 1L != 2L)) acc.perf = performance(pred.classif, measures = acc, model = mod.classif) expect_equal(acc.test, acc$fun(pred = pred.classif)) @@ -378,7 +382,7 @@ test_that("check measure calculations", { colauc.sens = c(colauc.sens, 0) # Numbers when we classify all as 0 colauc.omspec = c(colauc.omspec, 0) # Numbers when we classify all as 0 colauc.height = (colauc.sens[-1] + colauc.sens[-length(colauc.sens)]) / 2 - colauc.width = - diff(colauc.omspec) # = diff(rev(omspec)) + colauc.width = -diff(colauc.omspec) # = diff(rev(omspec)) expect_equal(sum(colauc.height * colauc.width), colAUC(as.numeric(pred.art.bin), truth = tar.bin)[[1]]) # colAUC with "maximum = FALSE" colauc.min = colAUC(c(1, 0, 1, 1), truth = tar.bin, maximum = FALSE) @@ -411,17 +415,17 @@ test_that("check measure calculations", { colauc.omspec = c(colauc.omspec, 0) # Numbers when we classify all as 0 colauc.height = (colauc.sens[-1] + colauc.sens[-length(colauc.sens)]) / 2 colauc.width = -diff(colauc.omspec) # = diff(rev(colauc.omspec)) - if (sum(colauc.height * colauc.width) < 0.5) { - colauc2[i, 1] = 1 - sum(colauc.height * colauc.width) # calculate AUC using formula for the area of a trapezoid - } else { - colauc2[i, 1] = sum(colauc.height * colauc.width) # calculate AUC using formula for the area of a trapezoid + if (sum(colauc.height * colauc.width) < 0.5) { + colauc2[i, 1] = 1 - sum(colauc.height * colauc.width) # calculate AUC using formula for the area of a trapezoid + } else { + colauc2[i, 1] = sum(colauc.height * colauc.width) # calculate AUC using formula for the area of a trapezoid + } } -} expect_equal(colauc2[, 1], as.numeric(colAUC(as.numeric(pred.art.classif), truth = tar.classif)[, 1])) # multiclass.auc expect_equal(as.numeric(performance(pred.bin, measures = list(multiclass.aunu, multiclass.aunp, multiclass.au1u, multiclass.au1p))), - as.numeric(rep(performance(pred.bin, measures = auc), 4))) + as.numeric(rep(performance(pred.bin, measures = auc), 4))) auc.lrn = makeLearner("classif.rpart", predict.type = "prob") auc.fit = train(auc.lrn, iris.task) auc.pred.constant = predict(auc.fit, subsetTask(iris.task, 1:50)) @@ -453,9 +457,12 @@ test_that("check measure calculations", { expect_equal(logloss.test, logloss$fun(pred = pred.classif)) expect_equal(logloss.test, as.numeric(logloss.perf)) - #ssr + # ssr pred.probs = getPredictionProbabilities(pred.classif) - ssr.test = mean(vnapply(seq_row(pred.probs), function(i) {pred.probs[i, tar.classif[i]]}) / sqrt(rowSums(pred.probs^2))) + ssr.test = mean(vnapply(seq_row(pred.probs), function(i) { + + pred.probs[i, tar.classif[i]] + }) / sqrt(rowSums(pred.probs^2))) ssr.perf = performance(pred.classif, measures = ssr, model = mod.classif) expect_equal(ssr.test, ssr$fun(pred = pred.classif)) expect_equal(ssr.test, as.numeric(ssr.perf)) @@ -464,7 +471,7 @@ test_that("check measure calculations", { expect_equal(measureSSR(p2, y1), 0.5 * (0.9 / sqrt(0.1^2 + 0.9^2) + 0.2 / sqrt(0.2^2 + 0.8^2))) expect_equal(measureSSR(p2[1, , drop = FALSE], y2[1]), 0.1 / sqrt(0.1^2 + 0.9^2)) expect_equal(measureSSR(p2[1, , drop = FALSE], y1[1]), 0.9 / sqrt(0.1^2 + 0.9^2)) - #qsr + # qsr qsr.test = 1 - mean(rowSums((pred.probs - model.matrix(~ . + 0, data = as.data.frame(tar.classif)))^2)) qsr.perf = performance(pred.classif, measures = qsr, model = mod.classif) expect_equal(qsr.test, qsr$fun(pred = pred.classif)) @@ -474,7 +481,7 @@ test_that("check measure calculations", { expect_equal(measureQSR(p2, y1), 1 - 0.5 * ((1 - 0.9)^2 + (0 - 0.1)^2 + (1 - 0.2)^2 + (0 - 0.8)^2)) expect_equal(measureQSR(p2[1, , drop = FALSE], y2[1]), 1 - (1 - 0.1)^2 - (0 - 0.9)^2) expect_equal(measureQSR(p2[1, , drop = FALSE], y1[1]), 1 - (1 - 0.9)^2 - (0 - 0.1)^2) - #lsr + # lsr lsr.test = mean(log(pred.probs[model.matrix(~ . + 0, data = as.data.frame(tar.classif)) - pred.probs > 0])) lsr.perf = performance(pred.classif, measures = lsr, model = mod.classif) expect_equal(lsr.test, lsr$fun(pred = pred.classif)) @@ -484,14 +491,14 @@ test_that("check measure calculations", { expect_equal(measureLSR(p2, y1), mean(log(c(0.9, 0.2)))) expect_equal(measureLSR(p2[1, , drop = FALSE], y2[1]), log(0.1)) expect_equal(measureLSR(p2[1, , drop = FALSE], y1[1]), log(0.9)) - #kappa + # kappa p0 = 0.5 pe = (0.25 * 0.25 + 0.5 * 0.5 + 0.25 * 0.25) / 1 kappa.test = 1 - (1 - p0) / (1 - pe) kappa.perf = performance(pred.classif, measures = kappa, model = mod.classif) expect_equal(measureKAPPA(tar.classif, pred.art.classif), kappa.test) expect_equal(measureKAPPA(tar.classif, pred.art.classif), as.numeric(kappa.perf)) - #wkappa + # wkappa conf.mat = matrix(c(1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L), nrow = 3L) / 4L expected.mat = c(0.25, 0.5, 0.25) %*% t(c(0.25, 0.5, 0.25)) weights = matrix(c(0, 1, 4, 1, 0, 1, 4, 1, 0), nrow = 3L) @@ -505,9 +512,9 @@ test_that("check measure calculations", { levels(pred.art.classif2) = as.numeric(levels(pred.art.classif))^2 expect_equal(measureWKAPPA(tar.classif2, pred.art.classif2), wkappa.test) - #test binaryclass measures + # test binaryclass measures - #brier + # brier pred.probs = getPredictionProbabilities(pred.bin) brier.test = mean((as.numeric(tar.bin == "0") - pred.probs)^2) brier.perf = performance(pred.bin, measures = brier, model = mod.bin) @@ -516,7 +523,7 @@ test_that("check measure calculations", { expect_equal(measureBrier(c(1, 1, 0), c("a", "a", "a"), "b", "a"), 1 / 3) expect_equal(measureBrier(c(1, 1, 1), c("a", "a", "a"), "b", "a"), 0) expect_equal(measureBrier(c(0, 0, 0), c("a", "a", "a"), "b", "a"), 1) - #brier.scaled + # brier.scaled inc = mean(pred.probs) brier.test.max = inc * (1 - inc)^2 + (1 - inc) * inc^2 brier.scaled.test = 1 - brier.test / brier.test.max @@ -526,106 +533,106 @@ test_that("check measure calculations", { expect_equal(measureBrierScaled(c(1, 1, 0), c("a", "a", "a"), "b", "a"), 1 - ((1 / 3) / (2 / 3 * 1 / 3))) expect_equal(measureBrierScaled(c(1, 1, 1), c("a", "a", "a"), "b", "a"), 1 - ((0) / (1 * 0))) expect_equal(measureBrierScaled(c(0, 0, 0), c("a", "a", "a"), "b", "a"), 1 - ((1) / (0 * 1))) - #tp + # tp tp.test = sum(tar.bin == pred.art.bin & pred.art.bin == 0L) tp.perf = performance(pred.bin, measures = tp, model = mod.bin) expect_equal(tp.test, tp$fun(pred = pred.bin)) expect_equal(tp.test, as.numeric(tp.perf)) - #tn + # tn tn.test = sum(tar.bin == pred.art.bin & pred.art.bin == 1L) tn.perf = performance(pred.bin, measures = tn, model = mod.bin) expect_equal(tn.test, tn$fun(pred = pred.bin)) expect_equal(tn.test, as.numeric(tn.perf)) - #fp + # fp fp.test = sum(tar.bin != pred.art.bin & pred.art.bin == 0L) fp.perf = performance(pred.bin, measures = fp, model = mod.bin) expect_equal(fp.test, fp$fun(pred = pred.bin)) expect_equal(fp.test, as.numeric(fp.perf)) - #fn + # fn fn.test = sum(tar.bin != pred.art.bin & pred.art.bin == 1L) fn.perf = performance(pred.bin, measures = fn, model = mod.bin) expect_equal(fn.test, fn$fun(pred = pred.bin)) expect_equal(fn.test, as.numeric(fn.perf)) - #tpr + # tpr tpr.test = tp.test / sum(tar.bin == 0L) tpr.perf = performance(pred.bin, measures = tpr, model = mod.bin) expect_equal(tpr.test, tpr$fun(pred = pred.bin)) expect_equal(tpr.test, as.numeric(tpr.perf)) - #tnr + # tnr tnr.test = tn.test / sum(tar.bin == 1L) tnr.perf = performance(pred.bin, measures = tnr, model = mod.bin) expect_equal(tnr.test, tnr$fun(pred = pred.bin)) expect_equal(tnr.test, as.numeric(tnr.perf)) - #fpr + # fpr fpr.test = fp.test / sum(tar.bin != 0L) fpr.perf = performance(pred.bin, measures = fpr, model = mod.bin) expect_equal(fpr.test, fpr$fun(pred = pred.bin)) expect_equal(fpr.test, as.numeric(fpr.perf)) - #fnr + # fnr fnr.test = fn.test / sum(tar.bin != 1L) fnr.perf = performance(pred.bin, measures = fnr, model = mod.bin) expect_equal(fnr.test, fnr$fun(pred = pred.bin)) expect_equal(fnr.test, as.numeric(fnr.perf)) - #ppv + # ppv ppv.test = tp.test / sum(pred.art.bin == 0L) ppv.perf = performance(pred.bin, measures = ppv, model = mod.bin) expect_equal(ppv.test, ppv$fun(pred = pred.bin)) expect_equal(ppv.test, as.numeric(ppv.perf)) - #npv + # npv npv.test = tn.test / sum(pred.art.bin == 1L) npv.perf = performance(pred.bin, measures = npv, model = mod.bin) expect_equal(npv.test, npv$fun(pred = pred.bin)) expect_equal(npv.test, as.numeric(npv.perf)) - #fdr + # fdr fdr.test = fp.test / sum(pred.art.bin == 0L) fdr.perf = performance(pred.bin, measures = fdr, model = mod.bin) expect_equal(fdr.test, fdr$fun(pred = pred.bin)) expect_equal(fdr.test, as.numeric(fdr.perf)) - #bac + # bac bac.test = 0.5 * (tpr.test / (tpr.test + fnr.test) + tnr.test / -(tnr.test + fpr.test)) + (tnr.test + fpr.test)) bac.perf = performance(pred.bin, measures = bac, model = mod.bin) expect_equal(bac.test, bac$fun(pred = pred.bin)) expect_equal(bac.test, as.numeric(bac.perf)) - #ber + # ber ber.test = 1L - bac.test ber.perf = performance(pred.bin, measures = ber, model = mod.bin) expect_equal(ber.test, ber$fun(pred = pred.bin)) expect_equal(ber.test, as.numeric(ber.perf)) - #auc + # auc auc.test = (tpr.test + tnr.test) / 2L auc.perf = performance(pred.bin, measures = auc, model = mod.bin) expect_equal(auc.test, auc$fun(pred = pred.bin)) expect_equal(auc.test, as.numeric(auc.perf)) - #mcc + # mcc mcc.test = (tp.test * tn.test - fp.test * fn.test) / sqrt((tp.test + fp.test) * (tp.test + fn.test) * -(tn.test + fp.test) * (tn.test + fn.test)) - mcc.perf = performance(pred.bin, measures = mcc, model = mod.bin) + (tn.test + fp.test) * (tn.test + fn.test)) + mcc.perf = performance(pred.bin, measures = mcc, model = mod.bin) expect_equal(mcc.test, mcc$fun(pred = pred.bin)) expect_equal(mcc.test, as.numeric(mcc.perf)) - #f1 + # f1 f1.test = 2 * tp.test / (sum(tar.bin == 0L) + sum(pred.art.bin == 0L)) f1.perf = performance(pred.bin, measures = f1, model = mod.bin) expect_equal(f1.test, f1$fun(pred = pred.bin)) expect_equal(f1.test, as.numeric(f1.perf)) - #gmean + # gmean gmean.test = sqrt((tp.test / (tp.test + fn.test)) * tn.test / (tn.test + fp.test)) gmean.perf = performance(pred.bin, measures = gmean, model = mod.bin) expect_equal(gmean.test, gmean$fun(pred = pred.bin)) expect_equal(gmean.test, as.numeric(gmean.perf)) - #gpr + # gpr gpr.test = sqrt(ppv.test * tpr.test) gpr.perf = performance(pred.bin, measures = gpr, model = mod.bin) expect_equal(gpr.test, gpr$fun(pred = pred.bin)) expect_equal(gpr.test, as.numeric(gpr.perf)) - #test multilabel measures + # test multilabel measures # create response and predictions using all possible combinations - #bincombo = matrix(c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE), ncol = 2, byrow = TRUE) - #multi.y = bincombo[rep(1:4, times = 4),] - #multi.p = bincombo[rep(1:4, each = 4),] + # bincombo = matrix(c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE), ncol = 2, byrow = TRUE) + # multi.y = bincombo[rep(1:4, times = 4),] + # multi.p = bincombo[rep(1:4, each = 4),] multi.y = getPredictionTruth(pred.multilabel) multi.p = getPredictionResponse(pred.multilabel) @@ -645,7 +652,7 @@ test_that("check measure calculations", { TruePositives = rowSums(multi.y & multi.p), TrueNegatives = rowSums(!multi.y & !multi.p)) - #hamloss: how many values are not identical + # hamloss: how many values are not identical hamloss.test = mean(as.vector(multi.y) != as.vector(multi.p)) hamloss.perf = performance(pred.multilabel, measures = multilabel.hamloss, model = mod.multilabel) expect_equal(hamloss.test, multilabel.hamloss$fun(pred = pred.multilabel)) @@ -661,7 +668,7 @@ test_that("check measure calculations", { expect_equal(measureMultilabelHamloss(matrix(tf, ncol = 2), matrix(tt, ncol = 2)), 1 / 2) # 1 of 2 values are wrong expect_equal(measureMultilabelHamloss(cbind(tf, tf), cbind(tf, tt)), 1 / 4) # 1 of 4 values are wrong - #subset01: how many rows are not identical + # subset01: how many rows are not identical subset01.test = mean(rowSums(multi.y == multi.p) != ncol(multi.y)) subset01.perf = performance(pred.multilabel, measures = multilabel.subset01, model = mod.multilabel) expect_equal(subset01.test, multilabel.subset01$fun(pred = pred.multilabel)) @@ -675,7 +682,7 @@ test_that("check measure calculations", { expect_equal(measureMultilabelSubset01(matrix(tf, ncol = 2), matrix(tt, ncol = 2)), 1) # 1 of 1 obs is wrong expect_equal(measureMultilabelSubset01(cbind(tf, tf), cbind(tf, tt)), 1 / 2) # 1 of 2 obs is wrong - #f1mult + # f1mult f1.test = vnapply(seq_row(multi.y), function(i) 2 * sum(multi.y[i, ] * multi.p[i, ]) / (sum(multi.y[i, ]) + sum(multi.p[i, ]))) f1.test[is.na(f1.test)] = 1 f1.test = mean(f1.test) @@ -698,7 +705,7 @@ test_that("check measure calculations", { expect_equal(measureMultilabelF1(matrix(tf, ncol = 2), matrix(tt, ncol = 2)), 2 * 1 / 3) # 1 TRUE-TRUE match of 3 TRUE values expect_equal(measureMultilabelF1(rbind(tf, tf), rbind(tf, tt)), mean(c(2 * 1 / 2, 2 * 1 / 3))) # 1 TRUE-TRUE match of 2 and 3 TRUE values per obs - #accmult + # accmult acc.test = vnapply(seq_row(multi.y), function(i) sum(multi.y[i, ] & multi.p[i, ]) / (sum(multi.y[i, ] | multi.p[i, ]))) acc.test[is.na(acc.test)] = 1 acc.test = mean(acc.test) @@ -713,7 +720,7 @@ test_that("check measure calculations", { expect_equal(measureMultilabelACC(matrix(tf, ncol = 2), matrix(tt, ncol = 2)), 1 / 2) expect_equal(measureMultilabelACC(rbind(tf, tf), rbind(tf, tt)), mean(c(1, 1 / 2))) - #ppvmult + # ppvmult ppv.test = vnapply(seq_row(multi.y), function(i) sum(multi.y[i, ] & multi.p[i, ]) / (sum(multi.p[i, ]))) ppv.test = mean(ppv.test, na.rm = TRUE) ppv.perf = performance(pred.multilabel, measures = multilabel.ppv, model = mod.multilabel) @@ -728,7 +735,7 @@ test_that("check measure calculations", { expect_equal(measureMultilabelPPV(matrix(tf, ncol = 2), matrix(tt, ncol = 2)), 1 / 2) expect_equal(measureMultilabelPPV(rbind(tf, tf), rbind(tf, tt)), mean(c(1 / 1, 1 / 2))) - #tprmult + # tprmult tpr.test = vnapply(seq_row(multi.y), function(i) sum(multi.y[i, ] & multi.p[i, ]) / (sum(multi.y[i, ]))) tpr.test = mean(tpr.test, na.rm = TRUE) tpr.perf = performance(pred.multilabel, measures = multilabel.tpr, model = mod.multilabel) @@ -744,9 +751,9 @@ test_that("check measure calculations", { expect_equal(measureMultilabelTPR(matrix(tf, ncol = 2), matrix(tt, ncol = 2)), 1 / 1) expect_equal(measureMultilabelTPR(rbind(tf, tf), rbind(tf, tt)), mean(c(1 / 1, 1 / 1))) - #test survival measures + # test survival measures - #cindex + # cindex pos = pred.surv$data[pred.surv$data$truth.event == TRUE, "response"] neg = pred.surv$data[pred.surv$data$truth.event == FALSE, "response"] cons = c(ifelse(pos[1L] > neg, 1L, 0L), ifelse(pos[2L] > neg, 1L, 0L)) @@ -757,25 +764,25 @@ test_that("check measure calculations", { expect_equal(cindex.test, cindex$fun(pred = pred.surv)) expect_equal(cindex.test, as.numeric(cindex.perf)) - #test cost-sensitive measures + # test cost-sensitive measures - #meancosts + # meancosts meancosts.test = (0 + 0 + 0 + 1) / 4L meancosts.perf = performance(pred.costsens, measures = meancosts, - model = mod.costsens, task = task.costsens) + model = mod.costsens, task = task.costsens) expect_equal(meancosts.test, meancosts$fun(pred = pred.costsens, task = task.costsens)) expect_equal(meancosts.test, as.numeric(meancosts.perf)) - #mcp + # mcp mcp.test = meancosts.test - 0 mcp.perf = performance(pred.costsens, measures = mcp, task = task.costsens, model = mod.costsens) expect_equal(mcp.test, mcp$fun(pred = pred.costsens, task = task.costsens)) expect_equal(mcp.test, as.numeric(mcp.perf)) - #test clustering + # test clustering - #db + # db c2 = c(3, 1) c1 = c((1 + 2 + 4) / 3, (3 + 4 + 2) / 3) s1 = sqrt((sum((data.cluster[1, ] - c1)^2) + sum((data.cluster[2, ] - c1)^2) + @@ -785,10 +792,10 @@ test_that("check measure calculations", { db.perf = performance(pred.cluster, measures = db, model = mod.cluster, feats = data.cluster) expect_equal(db.test, db$fun(task = task.cluster, - pred = pred.cluster, feats = data.cluster)) + pred = pred.cluster, feats = data.cluster)) expect_equal(db.test, as.numeric(db.perf)) - #dunn + # dunn exdist = min(sqrt(sum((c(1, 3) - c(3, 1))^2)), sqrt(sum((c(2, 4) - c(3, 1))^2)), sqrt(sum((c(4, 3) - c(3, 2))^2))) indist = max(sqrt(sum((c(1, 3) - c(2, 4))^2)), sqrt(sum((c(1, 3) - c(4, 2))^2)), @@ -799,7 +806,7 @@ test_that("check measure calculations", { expect_equal(dunn.test, dunn$fun(pred = pred.cluster, feats = data.cluster)) expect_equal(dunn.test, as.numeric(dunn.perf)) - #g1 index + # g1 index exsum = sqrt(sum((c(1, 3) - c(3, 1))^2)) + sqrt(sum((c(2, 4) - c(3, 1))^2)) + sqrt(sum((c(4, 3) - c(3, 2))^2)) insum = sqrt(sum((c(1, 3) - c(2, 4))^2)) + sqrt(sum((c(1, 3) - c(4, 2))^2)) + @@ -809,24 +816,24 @@ test_that("check measure calculations", { model = mod.cluster, feats = data.cluster) expect_equal(g1.test, G1$fun(pred = pred.cluster, feats = data.cluster)) expect_equal(g1.test, as.numeric(g1.perf)) - #g2 index + # g2 index dists = as.matrix(dist(data.cluster, method = "euclidian")) c2.dists = as.vector(dists[, 3L]) c2.dists = c2.dists[c2.dists != 0L] c1.dists = unique(as.vector(dists [-3L, -3L])) c1.dists = c1.dists[c1.dists != 0L] con.pairs = vapply(c1.dists, function(x) x < c2.dists, - logical(length = length(c2.dists))) + logical(length = length(c2.dists))) con.pairs = sum(rowSums(con.pairs)) dis.pairs = vapply(c2.dists, function(x) x < c1.dists, - logical(length = length(c1.dists))) + logical(length = length(c1.dists))) dis.pairs = sum(rowSums(dis.pairs)) g2.test = (con.pairs - dis.pairs) / (con.pairs + dis.pairs) g2.perf = performance(pred.cluster, measures = G2, model = mod.cluster, feats = data.cluster) expect_equal(g2.test, G2$fun(pred = pred.cluster, feats = data.cluster)) expect_equal(g2.test, as.numeric(g2.perf)) - #silhouette + # silhouette dists = as.matrix(clusterSim::dist.GDM(data.cluster)) ais = replace(dists, dists == 0, NA)[-3L, -3L] ais = apply(ais, MARGIN = 2L, mean, na.rm = TRUE) @@ -839,19 +846,18 @@ test_that("check measure calculations", { expect_equal(silhouette.test, silhouette$fun(pred = pred.cluster, feats = data.cluster)) expect_equal(object = silhouette.test, as.numeric(silhouette.perf)) - #test that some measures are only transformations of each other + # test that some measures are only transformations of each other - #qsr is identical to the 1 - multiclass brier + # qsr is identical to the 1 - multiclass brier expect_equal(1 - measureMulticlassBrier(p1, y1), measureQSR(p1, y1), check.names = FALSE) qsr.bin.perf = performance(pred.bin, measures = qsr, model = mod.bin) expect_equal(1 - 2 * brier.perf, qsr.bin.perf, check.names = FALSE) expect_equal(lsr.perf, -1 * logloss.perf, check.names = FALSE) - #multiclass brier for a two class problem should be two times the binary brier score. + # multiclass brier for a two class problem should be two times the binary brier score. multiclass.brier.twoclass.perf = performance(pred.bin, measures = multiclass.brier, model = mod.bin) expect_equal(2 * brier.perf, multiclass.brier.twoclass.perf, check.names = FALSE) - }) test_that("getDefaultMeasure", { @@ -863,16 +869,18 @@ test_that("getDefaultMeasure", { }) test_that("measure properties", { - #hasMeasureProps yields correct properties + # hasMeasureProps yields correct properties expect_true(all(vlapply(listMeasures(create = TRUE), function(m) { + res = hasMeasureProperties(m, m$properties) all(res) & length(res) > 0 - }))) + }))) props = listMeasureProperties() - #all props exist in mlr$measure.properties + # all props exist in mlr$measure.properties expect_true(all(vlapply(listMeasures(create = TRUE), function(m) { + res = all(getMeasureProperties(m) %in% props) all(res) & length(res) > 0 }))) @@ -933,7 +941,7 @@ test_that("bac works as intended with multiclass tasks (#1834)", { pred.classif = predict(mod.classif, task.classif) bac.test = mean(diag(table(pred.classif$data$truth, pred.classif$data$response) / - table(pred.classif$data$truth, pred.classif$data$truth))) + table(pred.classif$data$truth, pred.classif$data$truth))) bac.perf = performance(pred.classif, measures = bac, model = mod.bin) expect_equal(bac.test, bac$fun(pred = pred.classif)) expect_equal(bac.test, as.numeric(bac.perf)) @@ -947,7 +955,7 @@ test_that("new bac gives the same result as old implementation", { perf = performance(pred, measures = bac) old.bac = mean(c(tp$fun(pred = pred) / sum(pred$data$truth == pred$task.desc$positive), - tn$fun(pred = pred) / sum(pred$data$truth == pred$task.desc$negative))) + tn$fun(pred = pred) / sum(pred$data$truth == pred$task.desc$negative))) expect_equivalent(old.bac, perf) }) diff --git a/tests/testthat/test_base_mergeBenchmarkResults.R b/tests/testthat/test_base_mergeBenchmarkResults.R index 319cc32f56..3a0911f649 100644 --- a/tests/testthat/test_base_mergeBenchmarkResults.R +++ b/tests/testthat/test_base_mergeBenchmarkResults.R @@ -8,6 +8,7 @@ test_that("mergeBenchmarkResults", { # checks if list of unmerged BenchmarkResults is equal to the merged BenchmarkResults checkBenchmarkResults = function(list, merged) { + expect_is(merged, "BenchmarkResult") rbinded = do.call("rbind", lapply(list, as.data.frame)) res = merge(rbinded, merged, by = c("task.id", "learner.id", "iter"), all = TRUE) @@ -55,8 +56,8 @@ test_that("mergeBenchmarkResults", { expect_error(mergeBenchmarkResults(list(l1t1, l2t1, l2t1)), "multiple times") expect_error(mergeBenchmarkResults(list(l1t1, l2t2)), "are missing") # FIXME: do we want to merge BMR with different measures? - #l1t1.acc = benchmark(learners[[1L]], tasks[[1L]], cv2, measures = acc) - #expect_error(mergeBenchmarkResults(l2t1, l1t1.acc), "same measures") + # l1t1.acc = benchmark(learners[[1L]], tasks[[1L]], cv2, measures = acc) + # expect_error(mergeBenchmarkResults(l2t1, l1t1.acc), "same measures") # check measure order bench1 = benchmark(learners[1:2], tasks[[1L]], cv2, measures = list(acc, mmce)) diff --git a/tests/testthat/test_base_multilabel.R b/tests/testthat/test_base_multilabel.R index ac127ef2c4..f9652e9e7b 100644 --- a/tests/testthat/test_base_multilabel.R +++ b/tests/testthat/test_base_multilabel.R @@ -62,6 +62,7 @@ test_that("MultilabelBinaryRelevanceWrapper with glmnet (#958)", { }) testMultilabelWrapper = function(fun, ...) { + desc = fun("classif.rpart")$model.subclass[1] test_that(desc, { lrn1 = makeLearner("classif.rpart") @@ -166,4 +167,3 @@ testMultilabelWrapper(makeMultilabelStackingWrapper) # check order testMultilabelWrapper(makeMultilabelClassifierChainsWrapper, order = c("y2", "y1")) testMultilabelWrapper(makeMultilabelNestedStackingWrapper, order = c("y2", "y1")) - diff --git a/tests/testthat/test_base_normalizeFeatures.R b/tests/testthat/test_base_normalizeFeatures.R index f478ce5b1a..7de6a8e829 100644 --- a/tests/testthat/test_base_normalizeFeatures.R +++ b/tests/testthat/test_base_normalizeFeatures.R @@ -1,7 +1,6 @@ context("normalizeFeatures") test_that("normalizeFeatures", { - df = data.frame(x1 = c(0, -1, 4, 2, 3), x2 = letters[1:5], target = letters[1:5]) task = makeClassifTask(data = df, target = "target") diff --git a/tests/testthat/test_base_performance.R b/tests/testthat/test_base_performance.R index b65e869952..8ae336185c 100644 --- a/tests/testthat/test_base_performance.R +++ b/tests/testthat/test_base_performance.R @@ -17,6 +17,7 @@ test_that("performance", { res = makeResampleDesc("CV", iters = 3) mymeasure = makeMeasure(id = "mym", minimize = TRUE, properties = c("classif", "classif.multi", "predtype.response"), fun = function(task, model, pred, feats, extra.args) { + # normal test error e1 = mean(pred$data$truth != pred$data$response) # we do this manually @@ -42,6 +43,7 @@ test_that("performance", { mymeasure = makeCustomResampledMeasure(measure.id = "mym", aggregation.id = "train.mean", properties = c("classif", "predtype.response"), fun = function(task, group, pred, feats, extra.args) { + mean(pred$data$truth != pred$data$response) }) rdesc = makeResampleDesc("Holdout") @@ -66,7 +68,9 @@ test_that("performance is NA if 'on.measure.not.applicable' is not 'stop'", { } else if (i == "warn") { expect_warning(expect_equal(unname(performance(pred, auc)), NA_real_)) # does this also work with benchmark? - expect_warning({b = benchmark(lrn, binaryclass.task, measures = list(acc, auc))}) + expect_warning({ + b = benchmark(lrn, binaryclass.task, measures = list(acc, auc)) + }) expect_true(any(is.na(as.data.frame(b)$auc))) expect_false(any(is.na(as.data.frame(b)$acc))) } else { diff --git a/tests/testthat/test_base_plotBMRBoxplots.R b/tests/testthat/test_base_plotBMRBoxplots.R index 79c7480000..825ff81bf6 100644 --- a/tests/testthat/test_base_plotBMRBoxplots.R +++ b/tests/testthat/test_base_plotBMRBoxplots.R @@ -48,7 +48,7 @@ test_that("BenchmarkResult", { testDocForStrings(doc, getBMRLearnerShortNames(res)[2:1], grid.size = 2L, ordered = TRUE) - # check error when learner short names are not unique + # check error when learner short names are not unique lrns = list( rf = makeLearner("classif.randomForest", id = "rf1"), rf2 = makeLearner("classif.randomForest", id = "rf2") diff --git a/tests/testthat/test_base_plotBMRRanksAsBarChart.R b/tests/testthat/test_base_plotBMRRanksAsBarChart.R index 6c07c52a9f..fc1b0d7c67 100644 --- a/tests/testthat/test_base_plotBMRRanksAsBarChart.R +++ b/tests/testthat/test_base_plotBMRRanksAsBarChart.R @@ -36,7 +36,7 @@ test_that("plotBMRRanksAsBarChart", { doc = XML::xmlParse(path) testDocForStrings(doc, getBMRLearnerShortNames(res)[2:1], ordered = TRUE) - # check error when learner short names are not unique + # check error when learner short names are not unique lrns = list( rf = makeLearner("classif.randomForest", id = "rf1"), rf2 = makeLearner("classif.randomForest", id = "rf2") diff --git a/tests/testthat/test_base_plotBMRSummary.R b/tests/testthat/test_base_plotBMRSummary.R index 7ad0de66f2..e5085031d0 100644 --- a/tests/testthat/test_base_plotBMRSummary.R +++ b/tests/testthat/test_base_plotBMRSummary.R @@ -34,5 +34,4 @@ test_that("BenchmarkSummary", { res = benchmark(lrns, tasks, rdesc, meas) expect_error(plotBMRSummary(res), "names are not unique") - }) diff --git a/tests/testthat/test_base_plotCritDifferences.R b/tests/testthat/test_base_plotCritDifferences.R index 6f55bd8d09..e3dd332969 100644 --- a/tests/testthat/test_base_plotCritDifferences.R +++ b/tests/testthat/test_base_plotCritDifferences.R @@ -2,7 +2,7 @@ context("test_CritDifferences") test_that("test_CritDifferences", { lrns = list(makeLearner("classif.rpart"), - makeLearner("classif.nnet")) + makeLearner("classif.nnet")) tasks = list(multiclass.task, binaryclass.task) rdesc = makeResampleDesc("Holdout") meas = list(acc, ber) @@ -13,26 +13,32 @@ test_that("test_CritDifferences", { res$results$binary$classif.nnet$aggr[2] = 1 res$results$multiclass$classif.nnet$aggr[2] = 1 - expect_warning({r1 = generateCritDifferencesData(res)}) + expect_warning({ + r1 = generateCritDifferencesData(res) + }) expect_is(r1, "CritDifferencesData") - expect_warning({r2 = generateCritDifferencesData(res, ber, test = "nemenyi")}) + expect_warning({ + r2 = generateCritDifferencesData(res, ber, test = "nemenyi") + }) expect_is(r2, "CritDifferencesData") r3 = generateCritDifferencesData(res, ber, p.value = 0.5, test = "bd") expect_is(r3, "CritDifferencesData") # Test Issue #554 (equally performing learners) lrns2 = list(makeLearner("classif.rpart", "rpart1"), - makeLearner("classif.rpart", "rpart2")) + makeLearner("classif.rpart", "rpart2")) res2 = benchmark(lrns2, tasks, rdesc, meas) - expect_warning({r4 = generateCritDifferencesData(res2, acc, p.value = 0.3, test = "bd")}, - "Learner performances might be exactly equal.") + expect_warning({ + r4 = generateCritDifferencesData(res2, acc, p.value = 0.3, test = "bd") + }, + "Learner performances might be exactly equal.") expect_is(r4, "CritDifferencesData") plotCritDifferences(r1) ggsave(tempfile(fileext = ".png")) plotCritDifferences(r2) ggsave(tempfile(fileext = ".png")) - plotCritDifferences(r3, baseline = "classif.rpart") + plotCritDifferences(r3, baseline = "classif.rpart") ggsave(tempfile(fileext = ".png")) plotCritDifferences(r4) ggsave(tempfile(fileext = ".png")) diff --git a/tests/testthat/test_base_plotResiduals.R b/tests/testthat/test_base_plotResiduals.R index 245e47afd3..976fe56497 100644 --- a/tests/testthat/test_base_plotResiduals.R +++ b/tests/testthat/test_base_plotResiduals.R @@ -1,7 +1,6 @@ context("plotResiduals") test_that("plotResiduals with prediction object", { - learner = makeLearner("regr.rpart") mod = train(learner, regr.task) preds = predict(mod, regr.task) diff --git a/tests/testthat/test_base_relativeOverfitting.R b/tests/testthat/test_base_relativeOverfitting.R index 0d25ba6488..b94aaf2d12 100644 --- a/tests/testthat/test_base_relativeOverfitting.R +++ b/tests/testthat/test_base_relativeOverfitting.R @@ -83,4 +83,3 @@ test_that("relativeOverfitting works for regression with train/test", { expect_true(is.numeric(ro$relative.overfit.mse)) expect_equal(nrow(ro), 1) }) - diff --git a/tests/testthat/test_base_resample.R b/tests/testthat/test_base_resample.R index f3ab88a79e..a94d17e1ac 100644 --- a/tests/testthat/test_base_resample.R +++ b/tests/testthat/test_base_resample.R @@ -52,7 +52,7 @@ test_that("resample", { fit = resample(lrn1, ct, makeResampleDesc("CV", iters = 2)) expect_error(resample("classif.rpart", multiclass.task, makeResampleDesc("Holdout"), - measures = list()), "length >= 1") + measures = list()), "length >= 1") }) test_that("resampling, predicting train set works", { @@ -81,9 +81,6 @@ test_that("resampling, predicting train set works", { expect_false(is.null(r$pred$predict.type)) expect_false(is.null(r$pred$threshold)) expect_equal(getTaskDesc(multiclass.task), r$pred$task.desc) - - - }) @@ -162,7 +159,7 @@ test_that("resample is extended by an additional measure", { measures = list(mmce, ber, auc, brier) # set aggregation method measures = lapply(measures, setAggregation, a) - #if (p == "train") measures = lapply(measures, setAggregation, train.mean) + # if (p == "train") measures = lapply(measures, setAggregation, train.mean) # create ResampleResult with all measures res.all = resample(lrn, binaryclass.task, rdesc, measures) # create ResampleResult with one measure and add the other ones afterwards @@ -196,8 +193,8 @@ test_that("resample printer respects show.info", { test_that("resample drops unseen factors in predict data set", { data = data.frame(a = c("a", "b", "a", "b", "a", "c"), - b = c(1, 1, 2, 2, 2, 1), - trg = c("a", "b", "a", "b", "a", "b")) + b = c(1, 1, 2, 2, 2, 1), + trg = c("a", "b", "a", "b", "a", "b")) task = makeClassifTask("unseen.factors", data, "trg") resinst = makeResampleInstance("Holdout", task) resinst$train.inds[[1]] = 1:4 diff --git a/tests/testthat/test_base_resample_bs.R b/tests/testthat/test_base_resample_bs.R index 9fc6bedbb5..6f967f9de8 100644 --- a/tests/testthat/test_base_resample_bs.R +++ b/tests/testthat/test_base_resample_bs.R @@ -26,9 +26,11 @@ test_that("bs resampling works", { requirePackagesOrSkip("rpart", default.method = "load") tt = function(formula, data, subset) { + rpart::rpart(formula, data = data[subset, ], minsplit = 12, cp = 0.09) } tp = function(model, newdata) { + predict(model, newdata, type = "class") } testBootstrap("classif.rpart", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp, parset = parset) diff --git a/tests/testthat/test_base_resample_convenience.R b/tests/testthat/test_base_resample_convenience.R index 85c934db96..d5fc9487fc 100644 --- a/tests/testthat/test_base_resample_convenience.R +++ b/tests/testthat/test_base_resample_convenience.R @@ -1,8 +1,8 @@ context("resample_convenience") test_that("resample convenience functions", { - mycheck = function(r) { + expect_true(all(!is.na(r$aggr))) } diff --git a/tests/testthat/test_base_resample_fixedwindowcv.R b/tests/testthat/test_base_resample_fixedwindowcv.R index 7c23086b4e..9523e91d3b 100644 --- a/tests/testthat/test_base_resample_fixedwindowcv.R +++ b/tests/testthat/test_base_resample_fixedwindowcv.R @@ -16,7 +16,7 @@ test_that("fixed instance works", { test_that("fixed instance works with value < 1", { rin = makeResampleInstance(makeResampleDesc("FixedWindowCV", horizon = .1, - initial.window = .5, skip = .02), size = 25) + initial.window = .5, skip = .02), size = 25) for (i in seq_len(length(rin$train.inds))) { i1 = rin$train.inds[[i]] @@ -31,7 +31,7 @@ test_that("fixed instance works with value < 1", { test_that("fixed instance works with values > 1", { rin = makeResampleInstance(makeResampleDesc("FixedWindowCV", horizon = 2, - initial.window = 8, skip = 1), size = 25) + initial.window = 8, skip = 1), size = 25) for (i in seq_len(length(rin$train.inds))) { i1 = rin$train.inds[[i]] @@ -46,8 +46,6 @@ test_that("fixed instance works with values > 1", { test_that("fixed instance throws warning for improper alignment", { expect_warning(makeResampleInstance(makeResampleDesc("FixedWindowCV", - horizon = 2, initial.window = 8, - skip = 2), size = 25)) + horizon = 2, initial.window = 8, + skip = 2), size = 25)) }) - - diff --git a/tests/testthat/test_base_resample_getResamplingIndices.R b/tests/testthat/test_base_resample_getResamplingIndices.R index c630c66961..3e9178a950 100644 --- a/tests/testthat/test_base_resample_getResamplingIndices.R +++ b/tests/testthat/test_base_resample_getResamplingIndices.R @@ -1,7 +1,6 @@ context("resample_cv") test_that("getResamplingIndices works with getTuneResult", { - task = makeClassifTask(data = iris, target = "Species") lrn = makeLearner("classif.rpart") # stupid mini grid @@ -27,7 +26,6 @@ test_that("getResamplingIndices works with getTuneResult", { }) test_that("getResamplingIndices works with getFeatSelResult", { - outer = makeResampleDesc("CV", iters = 2L) inner = makeResampleDesc("Holdout") @@ -36,6 +34,7 @@ test_that("getResamplingIndices works with getFeatSelResult", { lrn2 = makeFeatSelWrapper(lrn1, resampling = inner, control = ctrl) r = resample(lrn2, multiclass.task, outer, extract = function(model) { + getFeatSelResult(model) }) diff --git a/tests/testthat/test_base_resample_growingwindowcv.R b/tests/testthat/test_base_resample_growingwindowcv.R index 7f35da802f..8a70e1ecec 100644 --- a/tests/testthat/test_base_resample_growingwindowcv.R +++ b/tests/testthat/test_base_resample_growingwindowcv.R @@ -17,7 +17,7 @@ test_that("growing window instance works", { test_that("growing instance with values > 1 works", { rin = makeResampleInstance(desc = makeResampleDesc(method = "GrowingWindowCV", horizon = 2, - initial.window = 8, skip = 1), size = 25) + initial.window = 8, skip = 1), size = 25) for (i in seq_len(length(rin$train.inds))) { i1 = rin$train.inds[[i]] @@ -32,7 +32,7 @@ test_that("growing instance with values > 1 works", { test_that("growing window instance with values < 1 works", { rin = makeResampleInstance(desc = makeResampleDesc(method = "GrowingWindowCV", horizon = .1, - initial.window = .3, skip = .02), size = 25) + initial.window = .3, skip = .02), size = 25) for (i in seq_len(length(rin$train.inds))) { i1 = rin$train.inds[[i]] @@ -47,6 +47,6 @@ test_that("growing window instance with values < 1 works", { test_that("growing window instance throws warning for improper alignment", { expect_warning(makeResampleInstance(makeResampleDesc("GrowingWindowCV", - horizon = 2, initial.window = 8, - skip = 2), size = 25)) + horizon = 2, initial.window = 8, + skip = 2), size = 25)) }) diff --git a/tests/testthat/test_base_resample_operators.R b/tests/testthat/test_base_resample_operators.R index a0e1dd329e..8a56620d21 100644 --- a/tests/testthat/test_base_resample_operators.R +++ b/tests/testthat/test_base_resample_operators.R @@ -16,7 +16,7 @@ test_that("resample getter work", { r1 = resample(lrn, binaryclass.task, makeResampleDesc("CV", iters = 2, predict = "test")) r2 = resample(lrn, binaryclass.task, makeResampleDesc("CV", iters = 2, predict = "both")) # FIXME: add check for "train" after https://github.com/mlr-org/mlr/issues/1284 has been fixed - #r3 = resample(lrn, binaryclass.task, makeResampleDesc("CV", iters = 2, predict = "train"), setAggregation(mmce, train.mean)) + # r3 = resample(lrn, binaryclass.task, makeResampleDesc("CV", iters = 2, predict = "train"), setAggregation(mmce, train.mean)) # check if structure is correct expect_named(getRRPredictionList(r1), c("train", "test")) @@ -47,10 +47,9 @@ test_that("getRRPredictionList with se predict.type", { attr(ptrain$data, "row.names") = as.integer(row.names(ptrain$data)) ptest = predict(tmod, regr.task, subset = rinst$test.inds[[1]]) - attr(ptest$data, "row.names") = as.integer(row.names(ptest$data)) + attr(ptest$data, "row.names") = as.integer(row.names(ptest$data)) expect_equal(pl$train[[1]]$data, ptrain$data) expect_equal(pl$test[[1]]$data, ptest$data) - }) diff --git a/tests/testthat/test_base_resample_stratify.R b/tests/testthat/test_base_resample_stratify.R index 02a6a747a8..5288f4bd9c 100644 --- a/tests/testthat/test_base_resample_stratify.R +++ b/tests/testthat/test_base_resample_stratify.R @@ -1,15 +1,17 @@ context("resample_stratify") test_that("stratification instances work", { + mytest = function(rin, size1, size2) { - mytest = function(rin, size1, size2) { for (i in 1:rin$desc$iters) { i1 = rin$train.inds[[i]] i2 = rin$test.inds[[i]] - if (!missing(size1)) + if (!missing(size1)) { expect_true(all(as.numeric(table(getTaskTargets(multiclass.task)[i1])) == size1)) - if (!missing(size2)) + } + if (!missing(size2)) { expect_true(all(as.numeric(table(getTaskTargets(multiclass.task)[i2])) == size2)) + } expect_equal(sort(c(unique(i1), i2)), 1:150) } } diff --git a/tests/testthat/test_base_selectFeatures.R b/tests/testthat/test_base_selectFeatures.R index 46acc02854..31611ddc4e 100644 --- a/tests/testthat/test_base_selectFeatures.R +++ b/tests/testthat/test_base_selectFeatures.R @@ -25,6 +25,7 @@ test_that("selectFeatures", { # check bits.to.features bns = c("b1", "b2") btf = function(x, task) { + fns = getTaskFeatureNames(task) Reduce(c, list(fns[1:2], fns[3:4])[as.logical(x)], init = character(0)) } @@ -33,7 +34,7 @@ test_that("selectFeatures", { bits.to.features = btf, control = ctrl, show.info = FALSE) df = as.data.frame(fr$opt.path) expect_true(setequal(colnames(df), c("b1", "b2", "mmce.test.mean", "dob", "eol", - "exec.time", "error.message"))) + "exec.time", "error.message"))) expect_equal(nrow(df), 2L) expect_error(selectFeatures(lrn, task = multiclass.task, resampling = rdesc, bit.names = bns, control = ctrl, show.info = FALSE), "you also have to set bits.to.features") @@ -63,7 +64,7 @@ test_that("show info works in selectFeatures", { }, "1: [01].*([0-9]+ bits)") expect_message({ z = selectFeatures("classif.rpart", task = iris.task, resampling = rdesc, control = ctrl, show.info = TRUE) - }, "mmce.test.mean=0.[0-9]+") + }, "mmce.test.mean=0.[0-9]+") }) # we had a bug here when an empty model was created and isFailureModel could not be called on it, cf. #284 diff --git a/tests/testthat/test_base_spcv.R b/tests/testthat/test_base_spcv.R index 7c5cc5f30b..4b6e068616 100644 --- a/tests/testthat/test_base_spcv.R +++ b/tests/testthat/test_base_spcv.R @@ -1,32 +1,29 @@ test_that("Nested SpRepCV works without errors", { - data(spatial.task, package = "mlr", envir = environment()) lrn.ksvm = makeLearner("classif.ksvm", - predict.type = "prob", - kernel = "rbfdot") + predict.type = "prob", + kernel = "rbfdot") ps = makeParamSet(makeNumericParam("C", lower = 1, upper = 1), - makeNumericParam("sigma", lower = 1, upper = 1)) + makeNumericParam("sigma", lower = 1, upper = 1)) ctrl = makeTuneControlRandom(maxit = 1) inner = makeResampleDesc("SpCV", iters = 2) wrapper.ksvm = makeTuneWrapper(lrn.ksvm, resampling = inner, par.set = ps, - control = ctrl, show.info = FALSE, measures = list(auc)) + control = ctrl, show.info = FALSE, measures = list(auc)) outer = makeResampleDesc("SpRepCV", folds = 2, reps = 2) out = resample(wrapper.ksvm, spatial.task, - resampling = outer, show.info = TRUE, measures = list(auc)) + resampling = outer, show.info = TRUE, measures = list(auc)) expect_vector(out$measures.test$auc, any.missing = FALSE, len = 4) - }) test_that("SpRepCV works without errors", { - data(spatial.task, package = "mlr", envir = environment()) learner = makeLearner("classif.ksvm", predict.type = "prob", kernel = "rbfdot") @@ -34,8 +31,7 @@ test_that("SpRepCV works without errors", { resampling = makeResampleDesc("SpRepCV", fold = 2, reps = 2) out = resample(learner = learner, task = spatial.task, - resampling = resampling, measures = list(auc)) + resampling = resampling, measures = list(auc)) expect_vector(out$measures.test$auc, any.missing = FALSE, len = 4) - }) diff --git a/tests/testthat/test_base_tuneThreshold.R b/tests/testthat/test_base_tuneThreshold.R index f3a0abba4e..533697d58e 100644 --- a/tests/testthat/test_base_tuneThreshold.R +++ b/tests/testthat/test_base_tuneThreshold.R @@ -1,7 +1,7 @@ context("tuneThreshold") test_that("tuneThreshold", { - #binary classification + # binary classification res = makeResampleDesc("Holdout") lrn = makeLearner("classif.rpart", predict.type = "prob") rf = resample(lrn, task = binaryclass.task, resampling = res, measures = list(mmce)) diff --git a/tests/testthat/test_base_tuning.R b/tests/testthat/test_base_tuning.R index 120dbf684f..2d131d9b50 100644 --- a/tests/testthat/test_base_tuning.R +++ b/tests/testthat/test_base_tuning.R @@ -99,7 +99,7 @@ test_that("tuning works with tuneThreshold and multiple measures", { res = tuneParams(lrn, binaryclass.task, resampling = rdesc, measures = list(mmce, auc), par.set = ps, control = ctrl) expect_true(is.numeric(res$y) && length(res$y) == 2L && !any(is.na(res$y))) -# also check with infeasible stuff + # also check with infeasible stuff ps = makeParamSet( makeDiscreteParam("cp", values = c(0.1, -1)) ) @@ -144,7 +144,7 @@ test_that("tuning allows usage of budget", { test_that("Learner defined with expression in param requires, see #369 and PH #52", { ps = makeParamSet( makeDiscreteLearnerParam(id = "a", values = c("x", "y")), - makeNumericLearnerParam(id = "b", lower = 0.0, upper = 1.0, requires = expression(a == "x")) + makeNumericLearnerParam(id = "b", lower = 0.0, upper = 1.0, requires = expression(a == "x")) ) rdesc = makeResampleDesc("Holdout") @@ -155,7 +155,7 @@ test_that("Learner defined with expression in param requires, see #369 and PH #5 test_that("tuning does not break with small discrete values, see bug in #1115", { - ctrl = makeTuneControlGrid() + ctrl = makeTuneControlGrid() ps = makeParamSet( makeDiscreteParam("cp", values = c(1e-8, 1e-9)) ) @@ -170,10 +170,10 @@ test_that("tuning works with large param.sets", { # create long list of learner params ps.length = 200 long.learner.params = do.call(base::c, lapply(seq_len(ps.length), function(x) { + makeParamSet(makeIntegerLearnerParam(paste0("some.parameter", x), 1, 10)) })) lrn$par.set = c(lrn$par.set, long.learner.params) res = tuneParams(lrn, pid.task, cv5, par.set = long.learner.params, control = ctrl, show.info = TRUE) expect_class(res, "TuneResult") }) - diff --git a/tests/testthat/test_base_weights.R b/tests/testthat/test_base_weights.R index 943551dcb9..86f8180585 100644 --- a/tests/testthat/test_base_weights.R +++ b/tests/testthat/test_base_weights.R @@ -40,4 +40,3 @@ test_that("weights remain after subset", { expect_true(getTaskDesc(subsetTask(wtask, 1:10))$has.weights) } }) - diff --git a/tests/testthat/test_basenocran_batchmark.R b/tests/testthat/test_basenocran_batchmark.R index 9d3a838001..5472729da6 100644 --- a/tests/testthat/test_basenocran_batchmark.R +++ b/tests/testthat/test_basenocran_batchmark.R @@ -203,6 +203,7 @@ test_that("batchmark", { expect_equal(unique(tffd$iter), 1:2) f = function(tmp, cl) { + context(sprintf("batchmark: extracting %s", cl)) expect_true(is.list(tmp)) expect_true(setequal(names(tmp), task.names)) @@ -238,7 +239,7 @@ test_that("keep.preds and models are passed down to resample()", { expect_list(x$models, types = "WrappedModel") expect_is(x$pred, "ResamplePrediction") - ##test getter function for models + ## test getter function for models models = getBMRModels(res) expect_true(is.list(models)) expect_true(setequal(names(models), "binary")) @@ -289,13 +290,19 @@ test_that("batchmark works with incomplete results", { ids = batchmark(learners = learners, task = task, resampling = rin, reg = reg) submitJobs(1:6, reg = reg) expect_true(waitForJobs(reg = reg)) - expect_warning({res = reduceBatchmarkResults(ids = 1:6, reg = reg, keep.pred = FALSE)}, "subset") + expect_warning({ + res = reduceBatchmarkResults(ids = 1:6, reg = reg, keep.pred = FALSE) + }, "subset") expect_set_equal(getBMRLearnerIds(res), c("classif.lda", "classif.rpart")) - expect_warning({res = reduceBatchmarkResults(ids = 1:3, reg = reg, keep.pred = FALSE)}, "subset") + expect_warning({ + res = reduceBatchmarkResults(ids = 1:3, reg = reg, keep.pred = FALSE) + }, "subset") expect_set_equal(getBMRLearnerIds(res), "classif.lda") - expect_warning({res = reduceBatchmarkResults(ids = data.table(job.id = 5), reg = reg, keep.pred = FALSE)}, "subset") + expect_warning({ + res = reduceBatchmarkResults(ids = data.table(job.id = 5), reg = reg, keep.pred = FALSE) + }, "subset") expect_set_equal(getBMRLearnerIds(res), "classif.rpart") }) diff --git a/tests/testthat/test_classif_C50.R b/tests/testthat/test_classif_C50.R index c377240d92..4bcb0f41ff 100644 --- a/tests/testthat/test_classif_C50.R +++ b/tests/testthat/test_classif_C50.R @@ -49,4 +49,3 @@ test_that("classif_C50", { testProbParsets("classif.C50", multiclass.df, multiclass.target, multiclass.train.inds, old.probs.list, parset.list) }) - diff --git a/tests/testthat/test_classif_IBk.R b/tests/testthat/test_classif_IBk.R index 5e2b07fb92..f088b3a167 100644 --- a/tests/testthat/test_classif_IBk.R +++ b/tests/testthat/test_classif_IBk.R @@ -16,7 +16,7 @@ test_that("classif_IBk", { ctrl = do.call(RWeka::Weka_control, parset) set.seed(getOption("mlr.debug.seed")) m = RWeka::IBk(formula = multiclass.formula, data = multiclass.train, control = ctrl) - p = predict(m, newdata = multiclass.test, type = "class") + p = predict(m, newdata = multiclass.test, type = "class") p2 = predict(m, newdata = multiclass.test, type = "prob") old.predicts.list[[i]] = p old.probs.list[[i]] = p2 @@ -26,6 +26,7 @@ test_that("classif_IBk", { testProbParsets("classif.IBk", multiclass.df, multiclass.target, multiclass.train.inds, old.probs.list, parset.list) tt = function(formula, data, subset, ...) { + RWeka::IBk(formula, data = data[subset, ], control = RWeka::Weka_control(...)) } diff --git a/tests/testthat/test_classif_J48.R b/tests/testthat/test_classif_J48.R index 6745950894..fa175c8c78 100644 --- a/tests/testthat/test_classif_J48.R +++ b/tests/testthat/test_classif_J48.R @@ -8,7 +8,7 @@ test_that("classif_j48", { list(M = 10), list(M = 5, C = 0.4), list(M = 5, R = TRUE) - ) + ) old.predicts.list = list() old.probs.list = list() @@ -19,7 +19,7 @@ test_that("classif_j48", { parset$Q = as.integer(runif(1, min = -.Machine$integer.max, max = .Machine$integer.max)) ctrl = do.call(RWeka::Weka_control, parset) m = RWeka::J48(formula = multiclass.formula, data = multiclass.train, control = ctrl) - p = predict(m, newdata = multiclass.test, type = "class") + p = predict(m, newdata = multiclass.test, type = "class") p2 = predict(m, newdata = multiclass.test, type = "prob") old.predicts.list[[i]] = p old.probs.list[[i]] = p2 @@ -29,11 +29,11 @@ test_that("classif_j48", { testProbParsets("classif.J48", multiclass.df, multiclass.target, multiclass.train.inds, old.probs.list, parset.list) tt = function(formula, data, subset, ...) { + RWeka::J48(formula, data = data[subset, ], control = RWeka::Weka_control(..., Q = as.integer(runif(1, min = -.Machine$integer.max, max = .Machine$integer.max)))) } tp = function(model, newdata) predict(model, newdata, type = "class") testCVParsets("classif.J48", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp, parset.list = parset.list) - }) diff --git a/tests/testthat/test_classif_JRip.R b/tests/testthat/test_classif_JRip.R index caf36dfd91..9bc79dfa87 100644 --- a/tests/testthat/test_classif_JRip.R +++ b/tests/testthat/test_classif_JRip.R @@ -8,7 +8,7 @@ test_that("classif_JRip", { list(F = 5), list(F = 4, N = 3), list(F = 2, O = 4) - ) + ) old.predicts.list = list() old.probs.list = list() @@ -19,7 +19,7 @@ test_that("classif_JRip", { parset$S = as.integer(runif(1, min = -.Machine$integer.max, max = .Machine$integer.max)) ctrl = do.call(RWeka::Weka_control, parset) m = RWeka::JRip(formula = multiclass.formula, data = multiclass.train, control = ctrl) - p = predict(m, newdata = multiclass.test, type = "class") + p = predict(m, newdata = multiclass.test, type = "class") p2 = predict(m, newdata = multiclass.test, type = "prob") old.predicts.list[[i]] = p old.probs.list[[i]] = p2 @@ -29,6 +29,7 @@ test_that("classif_JRip", { testProbParsets("classif.JRip", multiclass.df, multiclass.target, multiclass.train.inds, old.probs.list, parset.list) tt = function(formula, data, subset, ...) { + RWeka::JRip(formula, data = data[subset, ], control = RWeka::Weka_control(..., S = as.integer(runif(1, min = -.Machine$integer.max, max = .Machine$integer.max)))) } diff --git a/tests/testthat/test_classif_LibLineaRMultiClassSVC.R b/tests/testthat/test_classif_LibLineaRMultiClassSVC.R index e655690c6d..9f586a86aa 100644 --- a/tests/testthat/test_classif_LibLineaRMultiClassSVC.R +++ b/tests/testthat/test_classif_LibLineaRMultiClassSVC.R @@ -28,5 +28,4 @@ test_that("classif_LiblineaRMultiClassSVC", { testSimpleParsets("classif.LiblineaRMultiClassSVC", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.predicts.list, parset.list) - }) diff --git a/tests/testthat/test_classif_LiblineaRL1L2SVC.R b/tests/testthat/test_classif_LiblineaRL1L2SVC.R index 6c30fc4c55..f0fa56701c 100644 --- a/tests/testthat/test_classif_LiblineaRL1L2SVC.R +++ b/tests/testthat/test_classif_LiblineaRL1L2SVC.R @@ -28,5 +28,4 @@ test_that("classif_LiblineaRL1L2SVC", { testSimpleParsets("classif.LiblineaRL1L2SVC", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.predicts.list, parset.list) - }) diff --git a/tests/testthat/test_classif_LiblineaRL1LogReg.R b/tests/testthat/test_classif_LiblineaRL1LogReg.R index 59f7b6c2ae..7b944f116a 100644 --- a/tests/testthat/test_classif_LiblineaRL1LogReg.R +++ b/tests/testthat/test_classif_LiblineaRL1LogReg.R @@ -29,5 +29,4 @@ test_that("classif_LiblineaRL1LogReg", { binaryclass.train.inds, old.predicts.list, parset.list) testProbParsets("classif.LiblineaRL1LogReg", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.probs.list, parset.list) - }) diff --git a/tests/testthat/test_classif_LiblineaRL2L1SVC.R b/tests/testthat/test_classif_LiblineaRL2L1SVC.R index e1b5172925..e43b0ac007 100644 --- a/tests/testthat/test_classif_LiblineaRL2L1SVC.R +++ b/tests/testthat/test_classif_LiblineaRL2L1SVC.R @@ -28,5 +28,4 @@ test_that("classif_LiblineaRL2L1SVC", { testSimpleParsets("classif.LiblineaRL2L1SVC", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.predicts.list, parset.list) - }) diff --git a/tests/testthat/test_classif_LiblineaRL2LogReg.R b/tests/testthat/test_classif_LiblineaRL2LogReg.R index fd18ccfa02..c167e28a44 100644 --- a/tests/testthat/test_classif_LiblineaRL2LogReg.R +++ b/tests/testthat/test_classif_LiblineaRL2LogReg.R @@ -38,5 +38,4 @@ test_that("classif_LiblineaRL2LogReg", { binaryclass.train.inds, old.predicts.list, parset.list) testProbParsets("classif.LiblineaRL2LogReg", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.probs.list, parset.list) - }) diff --git a/tests/testthat/test_classif_LiblineaRL2SVC.R b/tests/testthat/test_classif_LiblineaRL2SVC.R index 54bbf3dc02..17cf49bea0 100644 --- a/tests/testthat/test_classif_LiblineaRL2SVC.R +++ b/tests/testthat/test_classif_LiblineaRL2SVC.R @@ -34,5 +34,4 @@ test_that("classif_LiblineaRL2SVC", { testSimpleParsets("classif.LiblineaRL2SVC", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.predicts.list, parset.list2) - }) diff --git a/tests/testthat/test_classif_OneR.R b/tests/testthat/test_classif_OneR.R index 6882ea3f04..17cb4230ad 100644 --- a/tests/testthat/test_classif_OneR.R +++ b/tests/testthat/test_classif_OneR.R @@ -6,7 +6,7 @@ test_that("classif_OneR", { parset.list = list( list(), list(B = 3) - ) + ) old.predicts.list = list() old.probs.list = list() @@ -16,7 +16,7 @@ test_that("classif_OneR", { ctrl = do.call(RWeka::Weka_control, parset) set.seed(getOption("mlr.debug.seed")) m = RWeka::OneR(formula = multiclass.formula, data = multiclass.train, control = ctrl) - p = predict(m, newdata = multiclass.test, type = "class") + p = predict(m, newdata = multiclass.test, type = "class") p2 = predict(m, newdata = multiclass.test, type = "prob") old.predicts.list[[i]] = p old.probs.list[[i]] = p2 @@ -26,6 +26,7 @@ test_that("classif_OneR", { testProbParsets("classif.OneR", multiclass.df, multiclass.target, multiclass.train.inds, old.probs.list, parset.list) tt = function(formula, data, subset, ...) { + RWeka::OneR(formula, data = data[subset, ], control = RWeka::Weka_control(...)) } diff --git a/tests/testthat/test_classif_PART.R b/tests/testthat/test_classif_PART.R index 67a5d79359..ad05e671ea 100644 --- a/tests/testthat/test_classif_PART.R +++ b/tests/testthat/test_classif_PART.R @@ -20,7 +20,7 @@ test_that("classif_PART", { ctrl = do.call(RWeka::Weka_control, parset) m = RWeka::PART(formula = multiclass.formula, data = multiclass.train, control = ctrl) set.seed(getOption("mlr.debug.seed")) - p = predict(m, newdata = multiclass.test, type = "class") + p = predict(m, newdata = multiclass.test, type = "class") set.seed(getOption("mlr.debug.seed")) p2 = predict(m, newdata = multiclass.test, type = "prob") old.predicts.list[[i]] = p @@ -33,13 +33,13 @@ test_that("classif_PART", { old.probs.list, parset.list) tt = function(formula, data, subset, ...) { + set.seed(getOption("mlr.debug.seed")) RWeka::PART(formula, data = data[subset, ], control = RWeka::Weka_control(..., Q = - as.integer(runif(1, min = -.Machine$integer.max, max = .Machine$integer.max)))) + as.integer(runif(1, min = -.Machine$integer.max, max = .Machine$integer.max)))) } tp = function(model, newdata) predict(model, newdata, type = "class") testCVParsets("classif.PART", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp, parset.list = parset.list) - }) diff --git a/tests/testthat/test_classif_RRF.R b/tests/testthat/test_classif_RRF.R index f5c74dd24c..c9bee71296 100644 --- a/tests/testthat/test_classif_RRF.R +++ b/tests/testthat/test_classif_RRF.R @@ -5,7 +5,7 @@ test_that("classif_RRF", { parset.list = list( list(), - list(ntree = 50, mtry = 2), + list(ntree = 50, mtry = 2), list(ntree = 50, mtry = 4) ) @@ -27,11 +27,11 @@ test_that("classif_RRF", { } testSimpleParsets("classif.RRF", multiclass.df, multiclass.target, - multiclass.train.inds, old.predicts.list, parset.list) + multiclass.train.inds, old.predicts.list, parset.list) testProbParsets("classif.RRF", multiclass.df, multiclass.target, - multiclass.train.inds, old.probs.list, parset.list) + multiclass.train.inds, old.probs.list, parset.list) tt = RRF::RRF testCVParsets("classif.RRF", multiclass.df, multiclass.target, tune.train = tt, - parset.list = parset.list) + parset.list = parset.list) }) diff --git a/tests/testthat/test_classif_adaboostm1.R b/tests/testthat/test_classif_adaboostm1.R index 999d1f610a..b3ab2967fe 100644 --- a/tests/testthat/test_classif_adaboostm1.R +++ b/tests/testthat/test_classif_adaboostm1.R @@ -30,10 +30,10 @@ test_that("classif_adaboostm1", { testSimpleParsets("classif.adaboostm1", binaryclass.df, binaryclass.target, - binaryclass.train.inds, old.predicts.list, parset.list) + binaryclass.train.inds, old.predicts.list, parset.list) testProbParsets("classif.adaboostm1", binaryclass.df, binaryclass.target, - binaryclass.train.inds, old.probs.list, parset.list) + binaryclass.train.inds, old.probs.list, parset.list) for (i in seq_along(parset.list)) { parset = parset.list[[i]] @@ -50,12 +50,13 @@ test_that("classif_adaboostm1", { } testSimpleParsets("classif.adaboostm1", multiclass.df, multiclass.target, - multiclass.train.inds, old.predicts.list, parset.list) + multiclass.train.inds, old.predicts.list, parset.list) testProbParsets("classif.adaboostm1", multiclass.df, multiclass.target, - multiclass.train.inds, old.probs.list, parset.list) + multiclass.train.inds, old.probs.list, parset.list) tt = function(formula, data, subset, ...) { + RWeka::AdaBoostM1(formula, data = data[subset, ], control = RWeka::Weka_control(...)) } @@ -63,4 +64,3 @@ test_that("classif_adaboostm1", { testCVParsets("classif.adaboostm1", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp, parset.list = parset.list) }) - diff --git a/tests/testthat/test_classif_bartMachine.R b/tests/testthat/test_classif_bartMachine.R index da115e7c34..e328a356dc 100644 --- a/tests/testthat/test_classif_bartMachine.R +++ b/tests/testthat/test_classif_bartMachine.R @@ -36,9 +36,8 @@ test_that("classif_bartMachine", { testProbParsets("classif.bartMachine", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.probs.list, parset.list) - for (i in seq_along(parset.list)){ + for (i in seq_along(parset.list)) { expect_true(length(old.predicts.list[[i]]) == nrow(binaryclass.test)) expect_true(length(old.probs.list[[i]]) == nrow(binaryclass.test)) } - }) diff --git a/tests/testthat/test_classif_binomial.R b/tests/testthat/test_classif_binomial.R index b4ec429247..44e420623f 100644 --- a/tests/testthat/test_classif_binomial.R +++ b/tests/testthat/test_classif_binomial.R @@ -1,7 +1,6 @@ context("classif_binomial") test_that("classif_binomial", { - parset.list1 = list( list(family = binomial), list(family = binomial(link = "logit")), @@ -22,7 +21,7 @@ test_that("classif_binomial", { parset = parset.list1[[i]] set.seed(getOption("mlr.debug.seed")) m = glm(formula = binaryclass.formula, data = binaryclass.train[, -nof], family = parset$family) - p = predict(m, newdata = binaryclass.test[, -nof], type = "response") + p = predict(m, newdata = binaryclass.test[, -nof], type = "response") p = 1 - p p.class = as.factor(binaryclass.class.levs[ifelse(p > 0.5, 1, 2)]) old.predicts.list[[i]] = p.class diff --git a/tests/testthat/test_classif_boost.R b/tests/testthat/test_classif_boost.R index 058c18aed4..03c40628ca 100644 --- a/tests/testthat/test_classif_boost.R +++ b/tests/testthat/test_classif_boost.R @@ -43,16 +43,19 @@ test_that("classif_boosting", { ) tt = function(formula, data, subset = seq_len(nrow(data)), ...) { + args = list(...) - if (!is.null(args$cp)) + if (!is.null(args$cp)) { ctrl = rpart::rpart.control(cp = args$cp, xval = 0) - else + } else { ctrl = rpart::rpart.control(xval = 0) + } set.seed(getOption("mlr.debug.seed")) adabag::boosting(formula, data[subset, ], mfinal = args$mfinal, control = ctrl) } tp = function(model, newdata) { + set.seed(getOption("mlr.debug.seed")) as.factor(predict(model, newdata)$class) } diff --git a/tests/testthat/test_classif_clusterSVM.R b/tests/testthat/test_classif_clusterSVM.R index 85bce41021..f4c347a0bb 100644 --- a/tests/testthat/test_classif_clusterSVM.R +++ b/tests/testthat/test_classif_clusterSVM.R @@ -15,12 +15,12 @@ test_that("classif_clusterSVM", { old.predicts.list = list() - for (i in seq_along(parset.list1)){ + for (i in seq_along(parset.list1)) { parset = parset.list1[[i]] pars = list(data.matrix(binaryclass.train[, -61]), y = binaryclass.train[, 61]) pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) - #model = SwarmSVM::clusterSVM(x = data.matrix(binaryclass.train[,-61]), y = binaryclass.train[,61], + # model = SwarmSVM::clusterSVM(x = data.matrix(binaryclass.train[,-61]), y = binaryclass.train[,61], # centers = 3, seed = 0) m = do.call(SwarmSVM::clusterSVM, pars) old.predicts.list[[i]] = predict(m, data.matrix(binaryclass.test[, -61]))$predictions diff --git a/tests/testthat/test_classif_ctree.R b/tests/testthat/test_classif_ctree.R index 53bc8817a3..9f76b388f6 100644 --- a/tests/testthat/test_classif_ctree.R +++ b/tests/testthat/test_classif_ctree.R @@ -19,7 +19,7 @@ test_that("classif_ctree", { ctrl = do.call(party::ctree_control, parset) set.seed(getOption("mlr.debug.seed")) m = party::ctree(formula = multiclass.formula, data = multiclass.train, control = ctrl) - p = predict(m, newdata = multiclass.test, type = "response") + p = predict(m, newdata = multiclass.test, type = "response") p2 = Reduce(rbind, party::treeresponse(m, newdata = multiclass.test, type = "prob")) rownames(p2) = NULL colnames(p2) = levels(multiclass.df[, multiclass.target]) @@ -39,5 +39,4 @@ test_that("classif_ctree", { ct = makeClassifTask(target = "Species", data = df1) m = train(makeLearner("classif.ctree"), ct) predict(m, newdata = df2) - }) diff --git a/tests/testthat/test_classif_dbnDNN.R b/tests/testthat/test_classif_dbnDNN.R index 92633869d4..d63cd1af96 100644 --- a/tests/testthat/test_classif_dbnDNN.R +++ b/tests/testthat/test_classif_dbnDNN.R @@ -20,25 +20,25 @@ test_that("classif_dbnDNN", { set.seed(getOption("mlr.debug.seed")) - capture.output({ - # neuralnet is not dealing with formula with `.` well - x = data.matrix(binaryclass.train[, -ncol(binaryclass.train)]) - y = binaryclass.train[, ncol(binaryclass.train)] - - dict = sort(unique(y)) - onehot = matrix(0, length(y), length(dict)) - for (j in seq_along(dict)) { - ind = which(y == dict[j]) - onehot[ind, j] = 1 - } - pars = list(x = x, y = onehot) - pars = c(pars, parset) - m = do.call(deepnet::dbn.dnn.train, pars) - p = deepnet::nn.predict(m, data.matrix(binaryclass.test[, -ncol(binaryclass.test)])) - colnames(p) = binaryclass.class.levs - old.predicts.list[[i]] = as.factor(colnames(p)[max.col(p)]) - }) -} + capture.output({ + # neuralnet is not dealing with formula with `.` well + x = data.matrix(binaryclass.train[, -ncol(binaryclass.train)]) + y = binaryclass.train[, ncol(binaryclass.train)] + + dict = sort(unique(y)) + onehot = matrix(0, length(y), length(dict)) + for (j in seq_along(dict)) { + ind = which(y == dict[j]) + onehot[ind, j] = 1 + } + pars = list(x = x, y = onehot) + pars = c(pars, parset) + m = do.call(deepnet::dbn.dnn.train, pars) + p = deepnet::nn.predict(m, data.matrix(binaryclass.test[, -ncol(binaryclass.test)])) + colnames(p) = binaryclass.class.levs + old.predicts.list[[i]] = as.factor(colnames(p)[max.col(p)]) + }) + } testSimpleParsets("classif.dbnDNN", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.predicts.list, parset.list2) diff --git a/tests/testthat/test_classif_earth.R b/tests/testthat/test_classif_earth.R index 4b3c27a5c6..4aab753641 100644 --- a/tests/testthat/test_classif_earth.R +++ b/tests/testthat/test_classif_earth.R @@ -61,4 +61,3 @@ test_that("classif_earth can do multiclass classification", { testSimpleParsets("classif.earth", multiclass.df, multiclass.target, multiclass.train.inds, old.predicts.list, parset.list) testProbParsets("classif.earth", multiclass.df, multiclass.target, multiclass.train.inds, old.probs.list, parset.list) }) - diff --git a/tests/testthat/test_classif_evtree.R b/tests/testthat/test_classif_evtree.R index bfcc08528b..bce9b573a6 100644 --- a/tests/testthat/test_classif_evtree.R +++ b/tests/testthat/test_classif_evtree.R @@ -27,5 +27,4 @@ test_that("classif_evtree", { old.predicts.list, parset.list) testProbParsets("classif.evtree", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.probs.list, parset.list) - }) diff --git a/tests/testthat/test_classif_extraTrees.R b/tests/testthat/test_classif_extraTrees.R index 2741027eff..949eeae9dc 100644 --- a/tests/testthat/test_classif_extraTrees.R +++ b/tests/testthat/test_classif_extraTrees.R @@ -29,7 +29,7 @@ test_that("classif_extraTrees", { } testSimpleParsets("classif.extraTrees", binaryclass.df, binaryclass.target, binaryclass.train.inds, - old.predicts.list, parset.list) + old.predicts.list, parset.list) testProbParsets("classif.extraTrees", binaryclass.df, binaryclass.target, binaryclass.train.inds, - old.probs.list, parset.list) + old.probs.list, parset.list) }) diff --git a/tests/testthat/test_classif_fdausc.kernel.R b/tests/testthat/test_classif_fdausc.kernel.R index 48a2c18675..cab72a378b 100644 --- a/tests/testthat/test_classif_fdausc.kernel.R +++ b/tests/testthat/test_classif_fdausc.kernel.R @@ -39,18 +39,16 @@ test_that("classif_fdausc.kernel behaves like original api", { # check if the output from the original API matches the mlr learner's output expect_equal(as.character(cp2), as.character(p2)) expect_equal(as.character(cp), as.character(p1)) - }) test_that("predicttype prob for fda.usc", { requirePackagesOrSkip("fda.usc", default.method = "load") - lrn = makeLearner("classif.fdausc.kernel", predict.type = "prob") + lrn = makeLearner("classif.fdausc.kernel", predict.type = "prob") set.seed(getOption("mlr.debug.seed")) m = train(lrn, fda.binary.gp.task) cp = predict(m, newdata = getTaskData(fda.binary.gp.task, target.extra = TRUE, functionals.as = "matrix")$data) - expect_equal(class(cp)[1], "PredictionClassif") - + expect_equal(class(cp)[1], "PredictionClassif") }) test_that("resampling fdausc.kernel", { diff --git a/tests/testthat/test_classif_fdausc.knn.R b/tests/testthat/test_classif_fdausc.knn.R index 3f46615629..09ec148f6d 100644 --- a/tests/testthat/test_classif_fdausc.knn.R +++ b/tests/testthat/test_classif_fdausc.knn.R @@ -38,7 +38,6 @@ test_that("classif_fdausc.knn behaves like original api", { # check if the output from the original API matches the mlr learner's output expect_equal(as.character(cp2), as.character(p2)) expect_equal(as.character(cp), as.character(p1)) - }) test_that("predicttype prob for fda.usc", { @@ -48,8 +47,7 @@ test_that("predicttype prob for fda.usc", { set.seed(getOption("mlr.debug.seed")) m = train(lrn, fda.binary.gp.task) cp = predict(m, newdata = getTaskData(fda.binary.gp.task, target.extra = TRUE, functionals.as = "matrix")$data) - expect_equal(class(cp)[1], "PredictionClassif") - + expect_equal(class(cp)[1], "PredictionClassif") }) test_that("resampling fdausc.knn", { diff --git a/tests/testthat/test_classif_fdausc.np.R b/tests/testthat/test_classif_fdausc.np.R index f865d4accf..63eb261fd5 100644 --- a/tests/testthat/test_classif_fdausc.np.R +++ b/tests/testthat/test_classif_fdausc.np.R @@ -17,7 +17,7 @@ test_that("classif_fdausc.np behaves like original api", { a1 = fda.usc::classif.np(glearn, mlearn) # restructure internal function call (language-object) a1$C[[1]] = quote(classif.np) - #newdat = list("x"=mtest) + # newdat = list("x"=mtest) p1 = predict(a1, mtest) p2 = predict(a1, mlearn) @@ -40,5 +40,4 @@ test_that("classif_fdausc.np behaves like original api", { # check if the output from the original API matches the mlr learner's output expect_equal(as.character(cp2), as.character(p2)) expect_equal(as.character(cp), as.character(p1)) - }) diff --git a/tests/testthat/test_classif_featureless.R b/tests/testthat/test_classif_featureless.R index 0674d36934..e0ddd5cf0d 100644 --- a/tests/testthat/test_classif_featureless.R +++ b/tests/testthat/test_classif_featureless.R @@ -3,6 +3,7 @@ context("classif_featureless") test_that("classif_featureless", { # check content of learner model checkLearnerModel = function(mod, probs) { + lmod = getLearnerModel(mod) expect_named(lmod, c("method", "probs")) expect_equal(lmod$method, m) diff --git a/tests/testthat/test_classif_fnn.R b/tests/testthat/test_classif_fnn.R index 7acd3ea284..6e66732fbc 100644 --- a/tests/testthat/test_classif_fnn.R +++ b/tests/testthat/test_classif_fnn.R @@ -33,10 +33,12 @@ test_that("classif_fnn", { testSimpleParsets("classif.fnn", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.predicts.list2, parset.list) tt = function(formula, data, k = 1) { + j = which(colnames(data) == as.character(formula)[2]) list(train = data[, -j], cl = data[, j], k = k, target = j) } tp = function(model, newdata) { + newdata = newdata[, -model$target] FNN::knn(train = model$train, test = newdata, cl = model$cl, k = model$k) } diff --git a/tests/testthat/test_classif_gamboost.R b/tests/testthat/test_classif_gamboost.R index 49db74804b..5c1f603cd3 100644 --- a/tests/testthat/test_classif_gamboost.R +++ b/tests/testthat/test_classif_gamboost.R @@ -36,10 +36,10 @@ test_that("classif_gamboost", { test_that("classif_gamboost probability predictions with family 'AUC' and 'AdaExp'", { families = list("AUC", "AdaExp") - lapply(families, FUN = function(x){ + lapply(families, FUN = function(x) { + lrn = makeLearner("classif.gamboost", par.vals = list(family = x), predict.type = "prob") mod = train(lrn, binaryclass.task) expect_error(predict(mod, binaryclass.task), "support probabilities") }) }) - diff --git a/tests/testthat/test_classif_gaterSVM.R b/tests/testthat/test_classif_gaterSVM.R index 99a186f804..4b9e9a4203 100644 --- a/tests/testthat/test_classif_gaterSVM.R +++ b/tests/testthat/test_classif_gaterSVM.R @@ -7,12 +7,12 @@ test_that("classif_gaterSVM", { # Early Prediction model = SwarmSVM::gaterSVM(x = data.matrix(binaryclass.train[, -61]), y = binaryclass.train[, 61], - m = 2, max.iter = 1, seed = 0) + m = 2, max.iter = 1, seed = 0) p = predict(model, data.matrix(binaryclass.test[, -61])) p = factor(p, labels = levels(binaryclass.train[, 61])) testSimple("classif.gaterSVM", binaryclass.df, binaryclass.target, binaryclass.train.inds, p, - parset = list(m = 2, max.iter = 1, seed = 0)) + parset = list(m = 2, max.iter = 1, seed = 0)) # Prediction result containing only one class data = data.frame(a = c(1, 2, 1, 2), b = c(1, 1, 2, 2), c = c("a", "b", "a", "b")) diff --git a/tests/testthat/test_classif_gausspr.R b/tests/testthat/test_classif_gausspr.R index aae42a63a4..b2be62eade 100644 --- a/tests/testthat/test_classif_gausspr.R +++ b/tests/testthat/test_classif_gausspr.R @@ -1,7 +1,6 @@ context("classif_gausspr") test_that("classif_gausspr", { - requirePackages("kernlab", default.method = "load") parset.list = list( @@ -19,7 +18,7 @@ test_that("classif_gausspr", { pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) m = do.call(kernlab::gausspr, pars) - p = kernlab::predict(m, newdata = multiclass.test[, -5], type = "response") + p = kernlab::predict(m, newdata = multiclass.test[, -5], type = "response") p2 = kernlab::predict(m, newdata = multiclass.test[, -5], type = "probabilities") old.predicts.list[[i]] = p old.probs.list[[i]] = p2 diff --git a/tests/testthat/test_classif_gbm.R b/tests/testthat/test_classif_gbm.R index b3f478054a..e22d159b29 100644 --- a/tests/testthat/test_classif_gbm.R +++ b/tests/testthat/test_classif_gbm.R @@ -13,7 +13,7 @@ test_that("classif_gbm", { old.probs.list = list() mydata = binaryclass.train - mydata[, binaryclass.target] = as.numeric(mydata[, binaryclass.target] == getTaskDesc(binaryclass.task)$positive) + mydata[, binaryclass.target] = as.numeric(mydata[, binaryclass.target] == getTaskDesc(binaryclass.task)$positive) for (i in seq_along(parset.list)) { parset = parset.list[[i]] pars = list(binaryclass.formula, data = mydata, distribution = "bernoulli") diff --git a/tests/testthat/test_classif_geoDA.R b/tests/testthat/test_classif_geoDA.R index 801086d1ca..fbe30ccb78 100644 --- a/tests/testthat/test_classif_geoDA.R +++ b/tests/testthat/test_classif_geoDA.R @@ -5,16 +5,18 @@ test_that("classif_geoDA", { set.seed(getOption("mlr.debug.seed")) m = DiscriMiner::geoDA(multiclass.train[, -multiclass.class.col], group = multiclass.train[, multiclass.class.col]) - p = DiscriMiner::classify(m, newdata = multiclass.test[, -multiclass.class.col]) + p = DiscriMiner::classify(m, newdata = multiclass.test[, -multiclass.class.col]) testSimple("classif.geoDA", multiclass.df, multiclass.target, multiclass.train.inds, p$pred_class) tt = function(formula, data, subset, ...) { + j = which(colnames(data) == as.character(formula)[2]) m = DiscriMiner::geoDA(variables = data[subset, -j], group = data[subset, j]) list(model = m, target = j) } tp = function(model, newdata) { + DiscriMiner::classify(model$model, newdata = newdata[, -model$target])$pred_class } diff --git a/tests/testthat/test_classif_glmboost.R b/tests/testthat/test_classif_glmboost.R index 00a7f78014..66a1575a00 100644 --- a/tests/testthat/test_classif_glmboost.R +++ b/tests/testthat/test_classif_glmboost.R @@ -36,7 +36,8 @@ test_that("classif_glmboost", { test_that("classif_glmboost probability predictions with family 'AUC' and 'AdaExp'", { families = list("AUC", "AdaExp") - lapply(families, FUN = function(x){ + lapply(families, FUN = function(x) { + lrn = makeLearner("classif.glmboost", par.vals = list(family = x), predict.type = "prob") mod = train(lrn, binaryclass.task) expect_error(predict(mod, binaryclass.task), "support probabilities") @@ -85,4 +86,3 @@ test_that("classif_glmboost probability predictions with family 'AUC' and 'AdaEx # testProb("classif.glmboost", new.binary.df, binaryclass.target, binaryclass.train.inds, old.probs.list, parset.list2) # # }) - diff --git a/tests/testthat/test_classif_glmnet.R b/tests/testthat/test_classif_glmnet.R index 310140ed64..403bd2a159 100644 --- a/tests/testthat/test_classif_glmnet.R +++ b/tests/testthat/test_classif_glmnet.R @@ -15,8 +15,9 @@ test_that("classif_glmnet", { for (i in seq_along(parset.list)) { parset = parset.list[[i]] s = parset[["s"]] - if (is.null(s)) + if (is.null(s)) { s = 0.01 + } parset[["s"]] = NULL x = binaryclass.train y = x[, binaryclass.class.col] @@ -45,5 +46,4 @@ test_that("classif_glmnet", { binaryclass.train.inds, old.predicts.list, parset.list) testProbParsets("classif.glmnet", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.probs.list, parset.list) - }) diff --git a/tests/testthat/test_classif_h2odeeplearning.R b/tests/testthat/test_classif_h2odeeplearning.R index 8185e7c143..d3e296ca2e 100644 --- a/tests/testthat/test_classif_h2odeeplearning.R +++ b/tests/testthat/test_classif_h2odeeplearning.R @@ -11,7 +11,7 @@ test_that("classif_h2odeeplearning", { list(hidden = 2L, rate = 0.2), list(hidden = 2L, rate_decay = 0.2) ) - #h20deeplearning needs seed in function call to be reproducible + # h20deeplearning needs seed in function call to be reproducible debug.seed = getOption("mlr.debug.seed") parset.list = lapply(parset.list, function(x) c(x, seed = debug.seed, reproducible = TRUE)) old.probs.list = list() @@ -22,7 +22,7 @@ test_that("classif_h2odeeplearning", { y = binaryclass.target, training_frame = h2o::as.h2o(binaryclass.train))) m = do.call(h2o::h2o.deeplearning, parset) - p = predict(m, newdata = h2o::as.h2o(binaryclass.test)) + p = predict(m, newdata = h2o::as.h2o(binaryclass.test)) old.probs.list[[i]] = as.data.frame(p)[, 2L] } @@ -35,7 +35,7 @@ test_that("class names are integers and probabilities predicted (#1787)", { df = cbind(classx, df) classif.task = makeClassifTask(id = "example", data = df, target = "classx") - gb.lrn = makeLearner("classif.h2o.deeplearning", predict.type = "prob") + gb.lrn = makeLearner("classif.h2o.deeplearning", predict.type = "prob") rdesc = makeResampleDesc("CV", iters = 3, stratify = TRUE) rin = makeResampleInstance(rdesc, task = classif.task) r = resample(gb.lrn, classif.task, rin) diff --git a/tests/testthat/test_classif_h2ogbm.R b/tests/testthat/test_classif_h2ogbm.R index 5d141b0e6e..18aab55f04 100644 --- a/tests/testthat/test_classif_h2ogbm.R +++ b/tests/testthat/test_classif_h2ogbm.R @@ -20,7 +20,7 @@ test_that("classif_h2ogbm", { training_frame = h2o::as.h2o(binaryclass.train))) set.seed(getOption("mlr.debug.seed")) m = do.call(h2o::h2o.gbm, parset) - p = predict(m, newdata = h2o::as.h2o(binaryclass.test)) + p = predict(m, newdata = h2o::as.h2o(binaryclass.test)) old.probs.list[[i]] = as.data.frame(p)[, 2L] } @@ -33,7 +33,7 @@ test_that("class names are integers and probabilities predicted (#1787)", { df = cbind(classx, df) classif.task = makeClassifTask(id = "example", data = df, target = "classx") - gb.lrn = makeLearner("classif.h2o.gbm", predict.type = "prob") + gb.lrn = makeLearner("classif.h2o.gbm", predict.type = "prob") rdesc = makeResampleDesc("CV", iters = 3, stratify = TRUE) rin = makeResampleInstance(rdesc, task = classif.task) r = resample(gb.lrn, classif.task, rin) diff --git a/tests/testthat/test_classif_h2oglm.R b/tests/testthat/test_classif_h2oglm.R index 743b4a3ebb..f99f880236 100644 --- a/tests/testthat/test_classif_h2oglm.R +++ b/tests/testthat/test_classif_h2oglm.R @@ -19,7 +19,7 @@ test_that("classif_h2oglm", { training_frame = h2o::as.h2o(binaryclass.train))) set.seed(getOption("mlr.debug.seed")) m = do.call(h2o::h2o.glm, parset) - p = predict(m, newdata = h2o::as.h2o(binaryclass.test)) + p = predict(m, newdata = h2o::as.h2o(binaryclass.test)) old.probs.list[[i]] = as.data.frame(p)[, 2] } @@ -32,7 +32,7 @@ test_that("class names are integers and probabilities predicted (#1787)", { df = cbind(classx, df) classif.task = makeClassifTask(id = "example", data = df, target = "classx") - gb.lrn = makeLearner("classif.h2o.glm", predict.type = "prob") + gb.lrn = makeLearner("classif.h2o.glm", predict.type = "prob") rdesc = makeResampleDesc("CV", iters = 3, stratify = TRUE) rin = makeResampleInstance(rdesc, task = classif.task) r = resample(gb.lrn, classif.task, rin) diff --git a/tests/testthat/test_classif_h2orandomForest.R b/tests/testthat/test_classif_h2orandomForest.R index 9e673d50b0..d8571564e3 100644 --- a/tests/testthat/test_classif_h2orandomForest.R +++ b/tests/testthat/test_classif_h2orandomForest.R @@ -10,7 +10,7 @@ test_that("classif_h2orandomForest", { list(ntrees = 4), list(ntrees = 4, mtries = 2) ) - #h2orandomForest needs seed in function call to be reproducible + # h2orandomForest needs seed in function call to be reproducible debug.seed = getOption("mlr.debug.seed") parset.list = lapply(parset.list, function(x) c(x, seed = debug.seed)) old.probs.list = list() @@ -21,7 +21,7 @@ test_that("classif_h2orandomForest", { y = binaryclass.target, training_frame = h2o::as.h2o(binaryclass.train))) m = do.call(h2o::h2o.randomForest, parset) - p = predict(m, newdata = h2o::as.h2o(binaryclass.test)) + p = predict(m, newdata = h2o::as.h2o(binaryclass.test)) old.probs.list[[i]] = as.data.frame(p)[, 2L] } testProbParsets("classif.h2o.randomForest", binaryclass.df, binaryclass.target, @@ -34,7 +34,7 @@ test_that("class names are integers and probabilities predicted (#1787)", { df = cbind(classx, df) classif.task = makeClassifTask(id = "example", data = df, target = "classx") - gb.lrn = makeLearner("classif.h2o.randomForest", predict.type = "prob") + gb.lrn = makeLearner("classif.h2o.randomForest", predict.type = "prob") rdesc = makeResampleDesc("CV", iters = 3, stratify = TRUE) rin = makeResampleInstance(rdesc, task = classif.task) r = resample(gb.lrn, classif.task, rin) diff --git a/tests/testthat/test_classif_kknn.R b/tests/testthat/test_classif_kknn.R index e0317b4ac1..9173779a51 100644 --- a/tests/testthat/test_classif_kknn.R +++ b/tests/testthat/test_classif_kknn.R @@ -30,9 +30,11 @@ test_that("classif_kknn", { old.probs.list, parset.list) tt = function(formula, data, k = 7) { + return(list(formula = formula, data = data, k = k)) } tp = function(model, newdata) { + kknn::kknn(model$formula, train = model$data, test = newdata, k = model$k)$fitted } diff --git a/tests/testthat/test_classif_knn.R b/tests/testthat/test_classif_knn.R index fb1aa21db2..6a406879c4 100644 --- a/tests/testthat/test_classif_knn.R +++ b/tests/testthat/test_classif_knn.R @@ -29,9 +29,11 @@ test_that("classif_knn", { old.predicts.list, parset.list) tt = function(formula, data, k = 1) { + return(list(formula = formula, data = data, k = k)) } tp = function(model, newdata) { + target = as.character(model$formula)[2] train = model$data y = train[, target] diff --git a/tests/testthat/test_classif_ksvm.R b/tests/testthat/test_classif_ksvm.R index 3fb44537bf..a69d4a5253 100644 --- a/tests/testthat/test_classif_ksvm.R +++ b/tests/testthat/test_classif_ksvm.R @@ -28,24 +28,25 @@ test_that("classif_ksvm", { set.seed(getOption("mlr.debug.seed")) m = do.call(kernlab::ksvm, pars) - old.predicts.list[[i]] = kernlab::predict(m, newdata = multiclass.test) + old.predicts.list[[i]] = kernlab::predict(m, newdata = multiclass.test) old.probs.list[[i]] = kernlab::predict(m, newdata = multiclass.test, type = "prob") } testSimpleParsets("classif.ksvm", multiclass.df, multiclass.target, - multiclass.train.inds, old.predicts.list, parset.list2) + multiclass.train.inds, old.predicts.list, parset.list2) testProbParsets("classif.ksvm", multiclass.df, multiclass.target, multiclass.train.inds, old.probs.list, parset.list2) tt = function(formula, data, subset = 1:150, ...) { + kernlab::ksvm(x = formula, data = data[subset, ], kernel = "polydot", kpar = list(degree = 3, offset = 2, scale = 1.5)) } tp = function(model, newdata, ...) { + kernlab::predict(model, newdata = newdata) } testCV("classif.ksvm", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp, parset = list(kernel = "polydot", degree = 3, offset = 2, scale = 1.5)) - }) diff --git a/tests/testthat/test_classif_linDA.R b/tests/testthat/test_classif_linDA.R index 4559031fc5..1776ab105d 100644 --- a/tests/testthat/test_classif_linDA.R +++ b/tests/testthat/test_classif_linDA.R @@ -5,16 +5,18 @@ test_that("classif_linDA", { set.seed(getOption("mlr.debug.seed")) m = DiscriMiner::linDA(multiclass.train[, -multiclass.class.col], group = multiclass.train[, multiclass.class.col]) - p = DiscriMiner::classify(m, newdata = multiclass.test[, -multiclass.class.col]) + p = DiscriMiner::classify(m, newdata = multiclass.test[, -multiclass.class.col]) testSimple("classif.linDA", multiclass.df, multiclass.target, multiclass.train.inds, p$pred_class) tt = function(formula, data, subset, ...) { + j = which(colnames(data) == as.character(formula)[2]) m = DiscriMiner::linDA(variables = data[subset, -j], group = data[subset, j]) list(model = m, target = j) } tp = function(model, newdata) { + DiscriMiner::classify(model$model, newdata = newdata[, -model$target])$pred_class } diff --git a/tests/testthat/test_classif_liquidSVM.R b/tests/testthat/test_classif_liquidSVM.R index 3a531aa005..0585033f59 100644 --- a/tests/testthat/test_classif_liquidSVM.R +++ b/tests/testthat/test_classif_liquidSVM.R @@ -14,7 +14,7 @@ test_that("classif_liquidSVM", { list(clipping = 0), list(gamma_steps = 5, min_gamma = 0.1, max_gamma = 25, lambda_steps = 5, min_lambda = 0.1, max_lambda = 25), list(useCells = TRUE), - list(gammas = c(0.1,1,10), lambdas = c(0.1,1,10), c_values = c(0.1,1,10)) + list(gammas = c(0.1, 1, 10), lambdas = c(0.1, 1, 10), c_values = c(0.1, 1, 10)) ) # Kernel, more advanced parameters... @@ -32,5 +32,4 @@ test_that("classif_liquidSVM", { testSimpleParsets("classif.liquidSVM", multiclass.df, multiclass.target, multiclass.train.inds, old.predicts.list, parset.list[1]) - }) diff --git a/tests/testthat/test_classif_logreg.R b/tests/testthat/test_classif_logreg.R index 830ceba1f7..2a780e048e 100644 --- a/tests/testthat/test_classif_logreg.R +++ b/tests/testthat/test_classif_logreg.R @@ -13,8 +13,12 @@ test_that("classif_logreg", { testProb("classif.logreg", binaryclass.df, binaryclass.target, binaryclass.train.inds, p.prob) - tt = function(formula, data) {glm(formula, data = data, family = binomial)} + tt = function(formula, data) { + + glm(formula, data = data, family = binomial) + } tp = function(model, newdata) { + p = predict(model, newdata, type = "response") as.factor(binaryclass.class.levs[ifelse(p > 0.5, 2, 1)]) } diff --git a/tests/testthat/test_classif_lssvm.R b/tests/testthat/test_classif_lssvm.R index da8e53072d..a6c3bc5618 100644 --- a/tests/testthat/test_classif_lssvm.R +++ b/tests/testthat/test_classif_lssvm.R @@ -27,7 +27,7 @@ test_that("classif_lssvm", { } testSimpleParsets("classif.lssvm", multiclass.df, multiclass.target, - multiclass.train.inds, old.predicts.list, parset.list2) + multiclass.train.inds, old.predicts.list, parset.list2) # Bug in kernel = "polydot" @@ -37,10 +37,12 @@ test_that("classif_lssvm", { # testSimple("classif.lssvm", multiclass.df, multiclass.target, multiclass.train.inds, p, parset=list(kernel="polydot", degree=3, offset=2, scale=1.5)) tt = function(formula, data, subset = 1:150, ...) { + kernlab::lssvm(x = formula, data = data[subset, ], kernel = "rbfdot", kpar = list(sigma = 20)) } tp = function(model, newdata, ...) { + kernlab::predict(model, newdata = newdata) } diff --git a/tests/testthat/test_classif_mda.R b/tests/testthat/test_classif_mda.R index e121a89321..bb69eedcf9 100644 --- a/tests/testthat/test_classif_mda.R +++ b/tests/testthat/test_classif_mda.R @@ -25,7 +25,7 @@ test_that("classif_mda", { set.seed(getOption("mlr.debug.seed")) m = do.call(mda::mda, pars) set.seed(getOption("mlr.debug.seed")) - p = predict(m, newdata = multiclass.test) + p = predict(m, newdata = multiclass.test) set.seed(getOption("mlr.debug.seed")) p2 = predict(m, newdata = multiclass.test, type = "posterior") old.predicts.list[[i]] = p @@ -44,5 +44,4 @@ test_that("classif_mda", { parset.list = parset.list2) testCV("classif.mda", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp, parset = list(start.method = "lvq", subclasses = 17)) - }) diff --git a/tests/testthat/test_classif_mlp.R b/tests/testthat/test_classif_mlp.R index dd801dd19a..fe67579f20 100644 --- a/tests/testthat/test_classif_mlp.R +++ b/tests/testthat/test_classif_mlp.R @@ -35,4 +35,3 @@ test_that("classif_mlp", { testSimple("classif.mlp", binaryclass.df, binaryclass.target, binaryclass.train.inds, p, parset = list(size = 7, maxit = 100)) }) - diff --git a/tests/testthat/test_classif_multinom.R b/tests/testthat/test_classif_multinom.R index ae13ce50c3..e2d002b7a3 100644 --- a/tests/testthat/test_classif_multinom.R +++ b/tests/testthat/test_classif_multinom.R @@ -4,7 +4,9 @@ test_that("classif_multinom", { requirePackagesOrSkip("nnet", default.method = "load") set.seed(getOption("mlr.debug.seed")) - capture.output({m = nnet::multinom(formula = multiclass.formula, data = multiclass.train)}) + capture.output({ + m = nnet::multinom(formula = multiclass.formula, data = multiclass.train) + }) set.seed(getOption("mlr.debug.seed")) p = predict(m, newdata = multiclass.test) diff --git a/tests/testthat/test_classif_naiveBayes.R b/tests/testthat/test_classif_naiveBayes.R index f2d12b5a3a..f4842ac9ce 100644 --- a/tests/testthat/test_classif_naiveBayes.R +++ b/tests/testthat/test_classif_naiveBayes.R @@ -4,7 +4,7 @@ test_that("classif_naiveBayes", { requirePackagesOrSkip("e1071", default.method = "load") m = e1071::naiveBayes(formula = multiclass.formula, data = multiclass.train) - p = predict(m, newdata = multiclass.test[, -multiclass.class.col]) + p = predict(m, newdata = multiclass.test[, -multiclass.class.col]) p2 = predict(m, newdata = multiclass.test[, -multiclass.class.col], type = "raw") testSimple("classif.naiveBayes", multiclass.df, multiclass.target, multiclass.train.inds, p) diff --git a/tests/testthat/test_classif_neuralnet.R b/tests/testthat/test_classif_neuralnet.R index 3a12334206..641663287f 100644 --- a/tests/testthat/test_classif_neuralnet.R +++ b/tests/testthat/test_classif_neuralnet.R @@ -51,12 +51,12 @@ test_that("classif_neuralnet", { testSimple("classif.neuralnet", binaryclass.df, binaryclass.target, binaryclass.train.inds, p, parset = list(hidden = 7, err.fct = "ce")) -# Neuralnet doesn't have the `predict` method -# set.seed(getOption("mlr.debug.seed")) -# lrn = makeLearner("classif.neuralnet",hidden=7) -# task = makeClassifTask(data = binaryclass.df, target = binaryclass.target) -# m2 = try(train(lrn, task, subset = binaryclass.train.inds)) -# p2 = predictLearner(.learner=lrn,.model=m2, -# .newdata = binaryclass.test[,-ncol(binaryclass.test)]) -# expect_equal(p,p2,tol=1e-4) + # Neuralnet doesn't have the `predict` method + # set.seed(getOption("mlr.debug.seed")) + # lrn = makeLearner("classif.neuralnet",hidden=7) + # task = makeClassifTask(data = binaryclass.df, target = binaryclass.target) + # m2 = try(train(lrn, task, subset = binaryclass.train.inds)) + # p2 = predictLearner(.learner=lrn,.model=m2, + # .newdata = binaryclass.test[,-ncol(binaryclass.test)]) + # expect_equal(p,p2,tol=1e-4) }) diff --git a/tests/testthat/test_classif_nnTrain.R b/tests/testthat/test_classif_nnTrain.R index 3895e8a9c3..258f0d493b 100644 --- a/tests/testthat/test_classif_nnTrain.R +++ b/tests/testthat/test_classif_nnTrain.R @@ -25,7 +25,7 @@ test_that("classif_nnTrain", { set.seed(getOption("mlr.debug.seed")) testSimple("classif.nnTrain", binaryclass.df, binaryclass.target, binaryclass.train.inds, p, - parset = list()) + parset = list()) # test with params passed diff --git a/tests/testthat/test_classif_nnet.R b/tests/testthat/test_classif_nnet.R index 5da3bebbdc..e7bdb0c931 100644 --- a/tests/testthat/test_classif_nnet.R +++ b/tests/testthat/test_classif_nnet.R @@ -24,6 +24,7 @@ test_that("classif_nnet", { binaryclass.train.inds, p3, parset = list()) tt = function(formula, data, subset = 1:150, ...) { + nnet::nnet(formula, data = data[subset, ], size = 7, maxit = 50) } tp = function(model, newdata) as.factor(predict(model, newdata, type = "class")) @@ -43,4 +44,3 @@ test_that("classif_nnet", { pred2 = predict(mod, task = task) expect_equal(pred1$data$response, pred2$data$response) }) - diff --git a/tests/testthat/test_classif_nodeHarvest.R b/tests/testthat/test_classif_nodeHarvest.R index f0749a9852..53ef30c57f 100644 --- a/tests/testthat/test_classif_nodeHarvest.R +++ b/tests/testthat/test_classif_nodeHarvest.R @@ -27,5 +27,5 @@ test_that("classif_nodeHarvest", { testSimpleParsets("classif.nodeHarvest", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.predicts.list, parset.list) testProbParsets("classif.nodeHarvest", binaryclass.df, binaryclass.target, - binaryclass.train.inds, old.probs.list, parset.list) + binaryclass.train.inds, old.probs.list, parset.list) }) diff --git a/tests/testthat/test_classif_pamr.R b/tests/testthat/test_classif_pamr.R index 643867e267..91abc8ab8c 100644 --- a/tests/testthat/test_classif_pamr.R +++ b/tests/testthat/test_classif_pamr.R @@ -21,7 +21,9 @@ test_that("classif_pamr", { } else { threshold.predict = 1 } - capture.output({m = do.call(pamr::pamr.train, parset)}) + capture.output({ + m = do.call(pamr::pamr.train, parset) + }) newdata = t(binaryclass.test[, -binaryclass.class.col]) old.predicts.list[[i]] = pamr::pamr.predict(m, newdata, threshold = threshold.predict) old.probs.list[[i]] = pamr::pamr.predict(m, newdata, type = "posterior", threshold = threshold.predict)[, 1L] @@ -30,5 +32,5 @@ test_that("classif_pamr", { testSimpleParsets("classif.pamr", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.predicts.list, parset.list) testProbParsets("classif.pamr", binaryclass.df, binaryclass.target, binaryclass.train.inds, - old.probs.list, parset.list) + old.probs.list, parset.list) }) diff --git a/tests/testthat/test_classif_penalized.R b/tests/testthat/test_classif_penalized.R index c9a0bf353e..1bd2a79b36 100644 --- a/tests/testthat/test_classif_penalized.R +++ b/tests/testthat/test_classif_penalized.R @@ -36,10 +36,12 @@ test_that("classif_penalized", { ) tt = function(formula, data, subset = seq_len(nrow(data)), ...) { + penalized::penalized(formula, data = data[subset, ], ...) } tp = function(model, newdata, ...) { + pred = penalized::predict(model, data = newdata, ...) ifelse(pred > 0.5, binaryclass.class.levs[2L], binaryclass.class.levs[1L]) } diff --git a/tests/testthat/test_classif_plsdaCaret.R b/tests/testthat/test_classif_plsdaCaret.R index 0a1be6be9c..44bd3578c8 100644 --- a/tests/testthat/test_classif_plsdaCaret.R +++ b/tests/testthat/test_classif_plsdaCaret.R @@ -35,5 +35,4 @@ test_that("classif_plsdaCaret", { old.predicts.list, parset.list) testProbParsets("classif.plsdaCaret", binaryclass.df, binaryclass.target, binaryclass.train.inds, old.probs.list, parset.list) - }) diff --git a/tests/testthat/test_classif_probit.R b/tests/testthat/test_classif_probit.R index 80b9fdf4c4..a6e6514fe2 100644 --- a/tests/testthat/test_classif_probit.R +++ b/tests/testthat/test_classif_probit.R @@ -10,8 +10,12 @@ test_that("classif_probit", { testSimple("classif.probit", binaryclass.df, binaryclass.target, binaryclass.train.inds, p.class) testProb("classif.probit", binaryclass.df, binaryclass.target, binaryclass.train.inds, p.prob) - tt = function(formula, data) {glm(formula, data = data, family = binomial(link = "probit"))} + tt = function(formula, data) { + + glm(formula, data = data, family = binomial(link = "probit")) + } tp = function(model, newdata) { + p = predict(model, newdata, type = "response") as.factor(binaryclass.class.levs[ifelse(p > 0.5, 2, 1)]) } diff --git a/tests/testthat/test_classif_qda.R b/tests/testthat/test_classif_qda.R index d1a58e109d..83ea2e96bf 100644 --- a/tests/testthat/test_classif_qda.R +++ b/tests/testthat/test_classif_qda.R @@ -4,9 +4,9 @@ test_that("classif_qda", { requirePackagesOrSkip("MASS", default.method = "load") m = try(MASS::qda(formula = multiclass.formula, data = multiclass.train)) if (class(m) != "try-error") { - p = predict(m, newdata = multiclass.test) + p = predict(m, newdata = multiclass.test) } else { - p = m + p = m } testSimple("classif.qda", multiclass.df, multiclass.target, multiclass.train.inds, p$class) diff --git a/tests/testthat/test_classif_quaDA.R b/tests/testthat/test_classif_quaDA.R index b7aadb2882..8f74e78d8c 100644 --- a/tests/testthat/test_classif_quaDA.R +++ b/tests/testthat/test_classif_quaDA.R @@ -4,18 +4,20 @@ test_that("classif_quaDA", { requirePackagesOrSkip("DiscriMiner", default.method = "load") set.seed(getOption("mlr.debug.seed")) m = DiscriMiner::quaDA(multiclass.train[, -multiclass.class.col], group = multiclass.train[, multiclass.class.col]) - #m2 = DiscriMiner::quaDA(binaryclass.train[,1:10], group = binaryclass.train[,binaryclass.class.col], prob = TRUE) - p = DiscriMiner::classify(m, newdata = multiclass.test[, -multiclass.class.col]) - #p2 = DiscriMiner::classify(m2, newdata = binaryclass.test[,1:10]) + # m2 = DiscriMiner::quaDA(binaryclass.train[,1:10], group = binaryclass.train[,binaryclass.class.col], prob = TRUE) + p = DiscriMiner::classify(m, newdata = multiclass.test[, -multiclass.class.col]) + # p2 = DiscriMiner::classify(m2, newdata = binaryclass.test[,1:10]) testSimple("classif.quaDA", multiclass.df, multiclass.target, multiclass.train.inds, p$pred_class) tt = function(formula, data, subset, ...) { + j = which(colnames(data) == as.character(formula)[2]) m = DiscriMiner::quaDA(variables = data[subset, -j], group = data[subset, j]) list(model = m, target = j) } tp = function(model, newdata) { + DiscriMiner::classify(model$model, newdata = newdata[, -model$target])$pred_class } diff --git a/tests/testthat/test_classif_randomForest.R b/tests/testthat/test_classif_randomForest.R index 64471f0a75..4e5f3f4935 100644 --- a/tests/testthat/test_classif_randomForest.R +++ b/tests/testthat/test_classif_randomForest.R @@ -4,7 +4,7 @@ test_that("classif_randomForest", { requirePackagesOrSkip("randomForest", default.method = "load") parset.list = list( list(), - list(ntree = 50, mtry = 2), + list(ntree = 50, mtry = 2), list(ntree = 50, mtry = 4), list(ntree = 200, mtry = 2), list(ntree = 2000, mtry = 4, proximity = TRUE, oob.prox = TRUE) @@ -37,12 +37,12 @@ test_that("classif_randomForest", { testCVParsets("classif.randomForest", multiclass.df, multiclass.target, tune.train = tt, parset.list = parset.list) # FIXME test RF with one constant feature - #data = multiclass.df - #data = data[, c(1,5)] - #data[, 1] = 1 - #task = makeClassifTask(data=data, target=multiclass.target) - #m = train(makeLearner("classif.randomForest"), task) - #p = predict(m, task=task) + # data = multiclass.df + # data = data[, c(1,5)] + # data[, 1] = 1 + # task = makeClassifTask(data=data, target=multiclass.target) + # m = train(makeLearner("classif.randomForest"), task) + # p = predict(m, task=task) }) test_that("fix factors work", { diff --git a/tests/testthat/test_classif_ranger.R b/tests/testthat/test_classif_ranger.R index 191e4e8554..a0c0615b94 100644 --- a/tests/testthat/test_classif_ranger.R +++ b/tests/testthat/test_classif_ranger.R @@ -17,7 +17,7 @@ test_that("classif_ranger", { parset = c(parset, list(data = binaryclass.train, formula = binaryclass.formula, write.forest = TRUE, probability = TRUE, respect.unordered.factors = TRUE)) set.seed(getOption("mlr.debug.seed")) m = do.call(ranger::ranger, parset) - p = predict(m, data = binaryclass.test) + p = predict(m, data = binaryclass.test) old.probs.list[[i]] = p$predictions[, 1] } diff --git a/tests/testthat/test_classif_rda.R b/tests/testthat/test_classif_rda.R index 5c1f89a947..1c41144e8a 100644 --- a/tests/testthat/test_classif_rda.R +++ b/tests/testthat/test_classif_rda.R @@ -17,7 +17,7 @@ test_that("classif_rda", { list(gamma = 0.1, lambda = 0.1), list(gamma = 0.5, lambda = 1), list(gamma = 1, lambda = 0) - ) + ) old.predicts.list = list() old.probs.list = list() diff --git a/tests/testthat/test_classif_rknn.R b/tests/testthat/test_classif_rknn.R index 7d623f663d..1651f6cefd 100644 --- a/tests/testthat/test_classif_rknn.R +++ b/tests/testthat/test_classif_rknn.R @@ -8,7 +8,7 @@ test_that("classif_rknn", { mtry = c(2L, 3L) parset.grid = expand.grid(k = k, r = r, mtry = mtry) parset.list = apply(parset.grid, MARGIN = 1L, as.list) - #rknn needs integer seed for reproducibility + # rknn needs integer seed for reproducibility parset.list = lapply(parset.list, function(x) c(x, seed = 2015L)) parset.list = c(parset.list, list(list(seed = 2015L))) @@ -28,24 +28,26 @@ test_that("classif_rknn", { } testSimpleParsets("classif.rknn", multiclass.df, multiclass.target, multiclass.train.inds, - old.predicts.list, parset.list) + old.predicts.list, parset.list) tt = function(formula, data, k = 1L, r = 500L, mtry = 2L, seed = 2015L, cluster = NULL) { + return(list(formula = formula, data = data, k = k, r = r, mtry = mtry, - seed = seed, cluster = cluster)) + seed = seed, cluster = cluster)) } tp = function(model, newdata) { + target = as.character(model$formula)[2L] train = model$data y = train[, target] train[, target] = NULL newdata[, target] = NULL rknn::rknn(data = train, y = y, newdata = newdata, k = model$k, r = model$r, - mtry = model$mtry, seed = model$seed, cluster = model$cluster)$pred + mtry = model$mtry, seed = model$seed, cluster = model$cluster)$pred } - testCVParsets(t.name = "classif.rknn", df = multiclass.df, - target = multiclass.target, tune.train = tt, tune.predict = tp, - parset.list = parset.list) + testCVParsets(t.name = "classif.rknn", df = multiclass.df, + target = multiclass.target, tune.train = tt, tune.predict = tp, + parset.list = parset.list) }) diff --git a/tests/testthat/test_classif_rotationForest.R b/tests/testthat/test_classif_rotationForest.R index bb45104689..bda0fd0ae7 100644 --- a/tests/testthat/test_classif_rotationForest.R +++ b/tests/testthat/test_classif_rotationForest.R @@ -5,7 +5,7 @@ test_that("classif_rotationForest", { parset.list = list( list(), - list(L = 5L, K = 2L), + list(L = 5L, K = 2L), list(L = 10L, K = 2L) ) diff --git a/tests/testthat/test_classif_rpart.R b/tests/testthat/test_classif_rpart.R index 0b35d6b1e9..f42b19ca4c 100644 --- a/tests/testthat/test_classif_rpart.R +++ b/tests/testthat/test_classif_rpart.R @@ -20,7 +20,7 @@ test_that("classif_rpart", { pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) m = do.call(rpart::rpart, pars) - p = predict(m, newdata = multiclass.test, type = "class") + p = predict(m, newdata = multiclass.test, type = "class") p2 = predict(m, newdata = multiclass.test, type = "prob") old.predicts.list[[i]] = p old.probs.list[[i]] = p2 @@ -33,5 +33,4 @@ test_that("classif_rpart", { tp = function(model, newdata) predict(model, newdata, type = "class") testCVParsets("classif.rpart", multiclass.df, multiclass.target, tune.train = tt, tune.predict = tp, parset.list = parset.list) - }) diff --git a/tests/testthat/test_classif_rrlda.R b/tests/testthat/test_classif_rrlda.R index 0283a7f3fb..933d12b4e5 100644 --- a/tests/testthat/test_classif_rrlda.R +++ b/tests/testthat/test_classif_rrlda.R @@ -7,5 +7,4 @@ test_that("classif_rrlda", { p = predict(m, x = multiclass.test[, -multiclass.class.col])$class testSimple("classif.rrlda", multiclass.df, multiclass.target, multiclass.train.inds, p) - }) diff --git a/tests/testthat/test_classif_saeDNN.R b/tests/testthat/test_classif_saeDNN.R index dc85013939..378e001662 100644 --- a/tests/testthat/test_classif_saeDNN.R +++ b/tests/testthat/test_classif_saeDNN.R @@ -49,5 +49,5 @@ test_that("classif_saeDNN", { set.seed(getOption("mlr.debug.seed")) testSimple("classif.saeDNN", binaryclass.df, binaryclass.target, binaryclass.train.inds, p, - parset = list(hidden = 7)) + parset = list(hidden = 7)) }) diff --git a/tests/testthat/test_classif_svm.R b/tests/testthat/test_classif_svm.R index 3c1362e7c1..b3b250a6e3 100644 --- a/tests/testthat/test_classif_svm.R +++ b/tests/testthat/test_classif_svm.R @@ -1,12 +1,12 @@ context("classif_svm") # we cannot do a prob test, as set.seed sems not to work on e1071 svm for the prob parameters! -#requirePackagesOrSkip("e1071", default.method = "load") -#set.seed(1) -#m1=svm(Species~., data=iris, probability=T) -#set.seed(1) -#m2=svm(Species~., data=iris, probability=T) -#all.equal(m1, m2) +# requirePackagesOrSkip("e1071", default.method = "load") +# set.seed(1) +# m1=svm(Species~., data=iris, probability=T) +# set.seed(1) +# m2=svm(Species~., data=iris, probability=T) +# all.equal(m1, m2) test_that("classif_svm", { requirePackagesOrSkip("e1071", default.method = "load") @@ -34,11 +34,12 @@ test_that("classif_svm", { } testSimpleParsets("classif.svm", multiclass.df, multiclass.target, - multiclass.train.inds, old.predicts.list, parset.list) - #testProbParsets("classif.svm", multiclass.df, multiclass.target, + multiclass.train.inds, old.predicts.list, parset.list) + # testProbParsets("classif.svm", multiclass.df, multiclass.target, # multiclass.train.inds, old.probs.list, parset.list) tt = function(formula, data, subset = 1:150, ...) { + e1071::svm(formula, data = data[subset, ], kernel = "polynomial", degree = 3, coef0 = 2, gamma = 1.5) } diff --git a/tests/testthat/test_classif_xgboost.R b/tests/testthat/test_classif_xgboost.R index d5f38ddf35..00e85d0611 100644 --- a/tests/testthat/test_classif_xgboost.R +++ b/tests/testthat/test_classif_xgboost.R @@ -10,7 +10,7 @@ test_that("classif_xgboost", { parset.probs.list = list( list(), - list(objective = "multi:softprob") #We had a bug here that 'multi:softprob' didn't work with binaryclass + list(objective = "multi:softprob") # We had a bug here that 'multi:softprob' didn't work with binaryclass ) old.predicts.list = list() @@ -37,8 +37,9 @@ test_that("classif_xgboost", { if (is.null(parset$objective)) parset$objective = "binary:logistic" if (is.null(parset$verbose)) parset$verbose = 0L if (is.null(parset$nround)) parset$nrounds = 1L - if (parset$objective == "multi:softprob") + if (parset$objective == "multi:softprob") { parset$num_class = length(binaryclass.class.levs) + } pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) model = do.call(xgboost::xgboost, pars) diff --git a/tests/testthat/test_cluster_kkmeans.R b/tests/testthat/test_cluster_kkmeans.R index 75cc8cd522..03652891a8 100644 --- a/tests/testthat/test_cluster_kkmeans.R +++ b/tests/testthat/test_cluster_kkmeans.R @@ -19,7 +19,7 @@ test_that("cluster_kkmeans", { d.xc = kernlab::kernelMatrix(K, as.matrix(noclass.test), c) d.xx = matrix(rep(diag(kernlab::kernelMatrix(K, as.matrix(noclass.test))), each = ncol(d.xc)), ncol = ncol(d.xc), byrow = TRUE) d.cc = matrix(rep(diag(kernlab::kernelMatrix(K, as.matrix(c))), each = nrow(d.xc)), nrow = nrow(d.xc)) - d2 = d.xx + d.cc - 2 * d.xc #this is the squared kernel distance to the centers + d2 = d.xx + d.cc - 2 * d.xc # this is the squared kernel distance to the centers p = apply(d2, 1, function(x) BBmisc::getMinIndex(x, ties.method = "random")) old.predicts.list[[i]] = p } diff --git a/tests/testthat/test_featsel_FeatSelWrapper.R b/tests/testthat/test_featsel_FeatSelWrapper.R index ea32b5a2a2..0eafb46448 100644 --- a/tests/testthat/test_featsel_FeatSelWrapper.R +++ b/tests/testthat/test_featsel_FeatSelWrapper.R @@ -9,6 +9,7 @@ test_that("FeatSelWrapper", { lrn2 = makeFeatSelWrapper(lrn1, resampling = inner, control = ctrl) r = resample(lrn2, multiclass.task, outer, extract = function(model) { + getFeatSelResult(model) }) expect_true(!is.na(r$aggr[[1]])) @@ -21,15 +22,17 @@ test_that("FeatSelWrapper", { test_that("FeatSelWrapper works with custom bits", { bns = c("b1", "b2") btf = function(x, task) { + fns = getTaskFeatureNames(task) Reduce(c, list(fns[1:2], fns[3:4])[as.logical(x)], init = character(0)) } lrn1 = makeLearner("classif.rpart") ctrl = makeFeatSelControlRandom(maxit = 3) - lrn2 = makeFeatSelWrapper(lrn1, resampling = makeResampleDesc("Holdout"), control = ctrl, bit.names = bns, bits.to.features = btf) + lrn2 = makeFeatSelWrapper(lrn1, resampling = makeResampleDesc("Holdout"), control = ctrl, bit.names = bns, bits.to.features = btf) r = resample(lrn2, multiclass.task, cv2, extract = function(model) { + getFeatSelResult(model) }) expect_true(!is.na(r$aggr[[1]])) @@ -38,4 +41,3 @@ test_that("FeatSelWrapper works with custom bits", { bit.names = extractSubList(r$extract, "x.bit.names", simplify = FALSE) expect_true(is.list(bit.names) && length(bit.names) == 2L && all(sapply(feats, is.character))) }) - diff --git a/tests/testthat/test_featsel_FilterWrapper.R b/tests/testthat/test_featsel_FilterWrapper.R index adab45bd64..dceef8aba9 100644 --- a/tests/testthat/test_featsel_FilterWrapper.R +++ b/tests/testthat/test_featsel_FilterWrapper.R @@ -34,4 +34,3 @@ test_that("Filterwrapper permutation.importance (issue #814)", { expect_true(!any(is.na(r$aggr))) expect_subset(r$extract[[1]][[1]], getTaskFeatureNames(binaryclass.task)) }) - diff --git a/tests/testthat/test_featsel_fselectorrcpp.R b/tests/testthat/test_featsel_fselectorrcpp.R index 713fac9852..106449e248 100644 --- a/tests/testthat/test_featsel_fselectorrcpp.R +++ b/tests/testthat/test_featsel_fselectorrcpp.R @@ -1,26 +1,26 @@ context("filterFeatures_fselectorrcpp") - test_that("filterFeatures_fselectorrcpp", { - a = c(1, 2, 5.3, 6, -2, 4, 8.3, 9.2, 10.1) # numeric vector - b = c("one", "two", "three") # character vector - c = c(TRUE, TRUE, TRUE, FALSE, TRUE, FALSE) # logical vector - d = c(1L, 3L, 5L, 7L, 9L, 17L) - f = rep(c("c1", "c2"), 9) - df = data.frame(a = a, b = b, c = c, d = d, f = f) - df = convertDataFrameCols(df, logicals.as.factor = TRUE) - task = makeClassifTask(data = df, target = "f") +test_that("filterFeatures_fselectorrcpp", { + a = c(1, 2, 5.3, 6, -2, 4, 8.3, 9.2, 10.1) # numeric vector + b = c("one", "two", "three") # character vector + c = c(TRUE, TRUE, TRUE, FALSE, TRUE, FALSE) # logical vector + d = c(1L, 3L, 5L, 7L, 9L, 17L) + f = rep(c("c1", "c2"), 9) + df = data.frame(a = a, b = b, c = c, d = d, f = f) + df = convertDataFrameCols(df, logicals.as.factor = TRUE) + task = makeClassifTask(data = df, target = "f") - candidates = as.character(listFilterMethods()$id) - candidates = candidates[startsWith(candidates, "FSelectorRcpp")] - for (candidate in candidates) { - fv = generateFilterValuesData(task, method = candidate, nselect = 2L) - expect_class(fv, "FilterValues") - expect_data_frame(fv$data, nrow = getTaskNFeats(task)) - expect_set_equal(fv$data$name, getTaskFeatureNames(task)) - expect_numeric(fv$data[[candidate]], any.missing = FALSE, lower = 0, finite = TRUE) - } + candidates = as.character(listFilterMethods()$id) + candidates = candidates[startsWith(candidates, "FSelectorRcpp")] + for (candidate in candidates) { + fv = generateFilterValuesData(task, method = candidate, nselect = 2L) + expect_class(fv, "FilterValues") + expect_data_frame(fv$data, nrow = getTaskNFeats(task)) + expect_set_equal(fv$data$name, getTaskFeatureNames(task)) + expect_numeric(fv$data[[candidate]], any.missing = FALSE, lower = 0, finite = TRUE) + } - lrn = makeLearner("classif.rpart") - lrn = makeFilterWrapper(learner = lrn, fw.method = "FSelectorRcpp_information.gain", fw.perc = 0.1) - res = resample(learner = lrn, task = binaryclass.task, resampling = cv3, measures = list(mmce, timetrain), extract = getFilteredFeatures, show.info = FALSE) - expect_length(res$extract[[1L]], 6) + lrn = makeLearner("classif.rpart") + lrn = makeFilterWrapper(learner = lrn, fw.method = "FSelectorRcpp_information.gain", fw.perc = 0.1) + res = resample(learner = lrn, task = binaryclass.task, resampling = cv3, measures = list(mmce, timetrain), extract = getFilteredFeatures, show.info = FALSE) + expect_length(res$extract[[1L]], 6) }) diff --git a/tests/testthat/test_featsel_praznik.R b/tests/testthat/test_featsel_praznik.R index e8149aa6e9..b986f14dd2 100644 --- a/tests/testthat/test_featsel_praznik.R +++ b/tests/testthat/test_featsel_praznik.R @@ -1,9 +1,9 @@ context("filterFeatures_praznik") test_that("filterFeatures_praznik", { - a = c(1, 2, 5.3, 6, -2, 4, 8.3, 9.2, 10.1) # numeric vector - b = c("one", "two", "three") # character vector - c = c(TRUE, TRUE, TRUE, FALSE, TRUE, FALSE) # logical vector + a = c(1, 2, 5.3, 6, -2, 4, 8.3, 9.2, 10.1) # numeric vector + b = c("one", "two", "three") # character vector + c = c(TRUE, TRUE, TRUE, FALSE, TRUE, FALSE) # logical vector d = c(1L, 3L, 5L, 7L, 9L, 17L) f = rep(c("c1", "c2"), 9) df = data.frame(a = a, b = b, c = c, d = d, f = f, const1 = f, const2 = a) @@ -31,6 +31,7 @@ test_that("FilterWrapper with praznik mutual information, resample", { candidates = as.character(listFilterMethods()$id) candidates = candidates[startsWith(candidates, "praznik_")] lapply(candidates, function(x) { + lrn1 = makeLearner("classif.lda") lrn2 = makeFilterWrapper(lrn1, fw.method = x, fw.perc = 0.5) m = train(lrn2, binaryclass.task) @@ -49,7 +50,7 @@ test_that("FilterWrapper with praznik mutual information, resample", { }) test_that("FilterWrapper with praznik mutual information, resample", { - #wrapped learner with praznik on binaryclass.task + # wrapped learner with praznik on binaryclass.task lrn = makeFilterWrapper(makeLearner("classif.randomForest"), fw.method = "praznik_MIM", fw.abs = 2) mod = train(lrn, binaryclass.task) feat.imp = getFeatureImportance(mod)$res diff --git a/tests/testthat/test_featsel_selectFeatures.R b/tests/testthat/test_featsel_selectFeatures.R index 9ea8dd6d71..258fde01f3 100644 --- a/tests/testthat/test_featsel_selectFeatures.R +++ b/tests/testthat/test_featsel_selectFeatures.R @@ -55,6 +55,7 @@ test_that("selectFeatures", { bns = c("b1", "b2") btf = function(x, task) { + fns = getTaskFeatureNames(task) Reduce(c, list(fns[1:2], fns[3:4])[as.logical(x)], init = character(0)) } diff --git a/tests/testthat/test_featsel_selectFeaturesSequential.R b/tests/testthat/test_featsel_selectFeaturesSequential.R index a059bd7a0e..9293e5ce0b 100644 --- a/tests/testthat/test_featsel_selectFeaturesSequential.R +++ b/tests/testthat/test_featsel_selectFeaturesSequential.R @@ -4,15 +4,15 @@ context("selectFeaturesSequential") test_that("no crash with sffs", { p = mlbench::mlbench.waveform(1000) dataset = as.data.frame(p) - dataset = droplevels(subset(dataset, classes != 3)) + dataset = droplevels(subset(dataset, classes != 3)) mCT = makeClassifTask(data = dataset, target = "classes") - ctrl = makeFeatSelControlSequential(method = "sffs", maxit = NA,alpha = 0.001) + ctrl = makeFeatSelControlSequential(method = "sffs", maxit = NA, alpha = 0.001) mL = makeLearner("classif.logreg", predict.type = "prob") - inner = makeResampleDesc("Holdout",stratify = TRUE) + inner = makeResampleDesc("Holdout", stratify = TRUE) lrn = makeFeatSelWrapper(mL, resampling = inner, control = ctrl) outer = makeResampleDesc("CV", iters = 2, stratify = TRUE) # No error occurs - expect_error(resample(lrn, mCT, outer, extract = getFeatSelResult, measures = list(mlr::auc, mlr::acc, mlr::brier), models=TRUE), - NA) + expect_error(resample(lrn, mCT, outer, extract = getFeatSelResult, measures = list(mlr::auc, mlr::acc, mlr::brier), models = TRUE), + NA) }) diff --git a/tests/testthat/test_learners_all_classif.R b/tests/testthat/test_learners_all_classif.R index f51e2190ae..a43e661e2b 100644 --- a/tests/testthat/test_learners_all_classif.R +++ b/tests/testthat/test_learners_all_classif.R @@ -66,9 +66,10 @@ test_that("learners work: classif", { }) -test_that("weightedClassWrapper on all binary learners", { +test_that("weightedClassWrapper on all binary learners", { pos = getTaskDesc(binaryclass.task)$positive f = function(lrn, w) { + lrn1 = makeLearner(lrn) lrn2 = makeWeightedClassesWrapper(lrn1, wcw.weight = w) m = train(lrn2, binaryclass.task) @@ -78,6 +79,7 @@ test_that("weightedClassWrapper on all binary learners", { learners = listLearners(binaryclass.task, "class.weights") x = lapply(learners$class, function(lrn) { + cm1 = f(lrn, 0.001) cm2 = f(lrn, 1) cm3 = f(lrn, 1000) @@ -87,9 +89,10 @@ test_that("weightedClassWrapper on all binary learners", { }) -test_that("WeightedClassWrapper on all multiclass learners", { +test_that("WeightedClassWrapper on all multiclass learners", { levs = getTaskClassLevels(multiclass.task) f = function(lrn, w) { + lrn1 = makeLearner(lrn) param = lrn1$class.weights.param lrn2 = makeWeightedClassesWrapper(lrn1, wcw.weight = w) @@ -100,6 +103,7 @@ test_that("WeightedClassWrapper on all multiclass learners", { learners = listLearners(multiclass.task, "class.weights") x = lapply(learners$class, function(lrn) { + classes = getTaskFactorLevels(multiclass.task)[[multiclass.target]] n = length(classes) cm1 = f(lrn, setNames(object = c(10000, 1, 1), classes)) diff --git a/tests/testthat/test_learners_all_multilabel.R b/tests/testthat/test_learners_all_multilabel.R index f4ca1ce9f2..a7fb1b9fe0 100644 --- a/tests/testthat/test_learners_all_multilabel.R +++ b/tests/testthat/test_learners_all_multilabel.R @@ -33,5 +33,4 @@ test_that("learners work: multilabel", { task = multilabel.task, train.inds = multilabel.train.inds, multilabel.test.inds, weights = rep(c(10000L, 1L), c(10L, length(multilabel.train.inds) - 10L)), pred.type = "prob", get.pred.fun = getPredictionProbabilities) - }) diff --git a/tests/testthat/test_learners_classiflabelswitch.R b/tests/testthat/test_learners_classiflabelswitch.R index 76a51183ed..e652ee3943 100644 --- a/tests/testthat/test_learners_classiflabelswitch.R +++ b/tests/testthat/test_learners_classiflabelswitch.R @@ -4,16 +4,16 @@ n = 50L p = 2L mydata1 = matrix(runif(2 * n * p), nrow = 2 * n, ncol = p) mydata1 = as.data.frame(mydata1) -mydata1[1:n, ] = mydata1[1:n, ] + 10L -mydata1[(n + 1):(2 * n), ] = mydata1[(n + 1):(2 * n), ] - 10L +mydata1[1:n, ] = mydata1[1:n, ] + 10L +mydata1[(n + 1):(2 * n), ] = mydata1[(n + 1):(2 * n), ] - 10L mydata1$y = factor(rep(c("a", "b"), each = c(n))) mydata2 = mydata1 mydata2$y = factor(rep(c("a", "b"), each = c(n)), levels = c("b", "a")) mydata3 = matrix(runif(3 * n * p), nrow = 3 * n, ncol = p) mydata3 = as.data.frame(mydata3) -mydata3[1:n, ] = mydata3[1:n, ] + 10L -mydata3[(n + 1):(2 * n), ] = mydata3[(n + 1):(2 * n), ] - 10L +mydata3[1:n, ] = mydata3[1:n, ] + 10L +mydata3[(n + 1):(2 * n), ] = mydata3[(n + 1):(2 * n), ] - 10L mydata3$y = factor(rep(c("a", "b", "c"), each = c(n))) mydata4 = mydata3 mydata4$y = factor(rep(c("a", "b", "c"), each = c(n)), levels = c("c", "b", "a")) @@ -22,8 +22,8 @@ mytask1a = makeClassifTask(id = "t1a", data = mydata1, target = "y", positive = mytask1b = makeClassifTask(id = "t1b", data = mydata1, target = "y", positive = "b") mytask2a = makeClassifTask(id = "t2a", data = mydata2, target = "y", positive = "a") mytask2b = makeClassifTask(id = "t2b", data = mydata2, target = "y", positive = "b") -mytask3 = makeClassifTask(id = "t3", data = mydata3, target = "y") -mytask4 = makeClassifTask(id = "t4", data = mydata4, target = "y") +mytask3 = makeClassifTask(id = "t3", data = mydata3, target = "y") +mytask4 = makeClassifTask(id = "t4", data = mydata4, target = "y") hpars = list( classif.bartMachine = list(verbose = FALSE, run_in_sample = FALSE, @@ -46,6 +46,7 @@ test_that("no labels are switched", { checkErrsForTask = function(task, predtype) { + props = if (predtype == "response") character(0L) else "prob" lrns = listLearners(task, create = TRUE, properties = props) lids = extractSubList(lrns, "id") @@ -57,13 +58,15 @@ test_that("no labels are switched", { lrns = lrns[!toremove] vnapply(lrns, function(lrn) { + lrn = setPredictType(lrn, predtype) id = lrn$id hps = hpars[[id]] - if (!is.null(hps)) + if (!is.null(hps)) { lrn = setHyperPars(lrn, par.vals = hps) + } tmp = holdout(lrn, task, split = 0.5, stratify = TRUE) - #print(as.data.frame(getRRPredictions(tmp))) + # print(as.data.frame(getRRPredictions(tmp))) err = tmp$aggr[[1L]] expect_true(!is.na(err) & err <= 1 / 3, info = paste(getTaskDesc(task)$id, id, err, sep = ", ")) err diff --git a/tests/testthat/test_lint.R b/tests/testthat/test_lint.R index dee46e6a07..adf10bccde 100644 --- a/tests/testthat/test_lint.R +++ b/tests/testthat/test_lint.R @@ -11,4 +11,3 @@ if (isLintrVersionOk(identical(Sys.getenv("TRAVIS"), "true"))) { "To run lintr test, please install the github version of lintr by running", "> devtools::install_github(\"jimhester/lintr\")", sep = "\n")) } - diff --git a/tests/testthat/test_multilabel_cforest.R b/tests/testthat/test_multilabel_cforest.R index b19ad87fd6..2be73e4ad4 100644 --- a/tests/testthat/test_multilabel_cforest.R +++ b/tests/testthat/test_multilabel_cforest.R @@ -29,6 +29,4 @@ test_that("multilabel_cforest", { testProbParsets("multilabel.cforest", multilabel.df, multilabel.target, multilabel.train.inds, old.probs.list, parset.list2) - }) - diff --git a/tests/testthat/test_parallel_all.R b/tests/testthat/test_parallel_all.R index afafd2e411..5c7a11329a 100644 --- a/tests/testthat/test_parallel_all.R +++ b/tests/testthat/test_parallel_all.R @@ -2,6 +2,7 @@ context("parallel_all") test_that("parallel resampling", { doit = function(mode, level) { + lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("CV", iters = 2L) on.exit(parallelStop()) @@ -24,6 +25,7 @@ test_that("parallel resampling", { test_that("parallel tuning", { doit = function(mode, level) { + lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("CV", iters = 2L) ps = makeParamSet(makeDiscreteParam("cp", values = c(0.01, 0.05))) @@ -48,6 +50,7 @@ test_that("parallel tuning", { test_that("parallel featsel", { doit = function(mode, level) { + lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("CV", iters = 2L) ctrl = makeFeatSelControlRandom(maxit = 2L) @@ -71,6 +74,7 @@ test_that("parallel featsel", { test_that("parallel exporting of options works", { doit = function(mode, level) { + data = iris data[, 1] = 1 # this is going to crash lda task = makeClassifTask(data = data, target = "Species") @@ -90,6 +94,7 @@ test_that("parallel exporting of options works", { test_that("parallel partial dependence", { doit = function(mode) { + lrn = makeLearner("regr.rpart") fit = train(lrn, regr.task) on.exit(parallelStop()) @@ -106,6 +111,7 @@ test_that("parallel partial dependence", { test_that("parallel ensembles", { doit = function(mode, level) { + on.exit(parallelStop()) parallelStart(mode = mode, cpus = 2L, show.info = FALSE) @@ -148,7 +154,7 @@ test_that("parallel ensembles", { p = predict(fit, multilabel.task) } - ## CostSensWeightedPairsWrapper + ## CostSensWeightedPairsWrapper if (Sys.info()["sysname"] != "Windows") { doit("multicore", "mlr.ensemble") doit("mpi", "mlr.ensemble") diff --git a/tests/testthat/test_regr_FDboost.R b/tests/testthat/test_regr_FDboost.R index 6d05e6461c..2feb70d52c 100644 --- a/tests/testthat/test_regr_FDboost.R +++ b/tests/testthat/test_regr_FDboost.R @@ -23,7 +23,7 @@ test_that("regr_FDboost is equal to reference", { ctrl = learnerArgsToControl(mboost::boost_control, mstop = 100L, nu = 0.1) set.seed(getOption("mlr.debug.seed")) true.mod = FDboost::FDboost(frm, data = mat.list, - timeformula = ~bols(1), control = ctrl, family = mboost::Gaussian()) + timeformula = ~ bols(1), control = ctrl, family = mboost::Gaussian()) prd = predict(mlr.mod, newdata = getTaskData(fda.regr.fs.task, @@ -31,6 +31,4 @@ test_that("regr_FDboost is equal to reference", { prd2 = predict(true.mod, as.list(getTaskData(fda.regr.fs.task, functionals.as = "matrix"))) expect_equal(prd$data$response, prd2) - - }) diff --git a/tests/testthat/test_regr_GPfit.R b/tests/testthat/test_regr_GPfit.R index f94b734f0f..ab01f30280 100644 --- a/tests/testthat/test_regr_GPfit.R +++ b/tests/testthat/test_regr_GPfit.R @@ -2,6 +2,7 @@ context("regr_GPfit") test_that("regr_GPfit", { testFun = function(x) { + return(4 * x[, 1]^2 - 2 * x[, 2]) } set.seed(getOption("mlr.debug.seed")) diff --git a/tests/testthat/test_regr_IBk.R b/tests/testthat/test_regr_IBk.R index 632b560523..c6a71ce25e 100644 --- a/tests/testthat/test_regr_IBk.R +++ b/tests/testthat/test_regr_IBk.R @@ -16,7 +16,7 @@ test_that("regr_IBk", { pars = list(regr.formula, data = regr.train) pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) - m = RWeka::IBk(regr.formula, regr.train, control = ctrl) + m = RWeka::IBk(regr.formula, regr.train, control = ctrl) set.seed(getOption("mlr.debug.seed")) p = predict(m, newdata = regr.test) old.predicts.list[[i]] = p diff --git a/tests/testthat/test_regr_RRF.R b/tests/testthat/test_regr_RRF.R index aa0f71a7a5..02304b6814 100644 --- a/tests/testthat/test_regr_RRF.R +++ b/tests/testthat/test_regr_RRF.R @@ -22,5 +22,5 @@ test_that("regr_RRF", { } testSimpleParsets("regr.RRF", regr.df, regr.target, - regr.train.inds, old.predicts.list, parset.list) + regr.train.inds, old.predicts.list, parset.list) }) diff --git a/tests/testthat/test_regr_bartMachine.R b/tests/testthat/test_regr_bartMachine.R index 765df868aa..25158a8ba5 100644 --- a/tests/testthat/test_regr_bartMachine.R +++ b/tests/testthat/test_regr_bartMachine.R @@ -22,10 +22,9 @@ test_that("regr_bartMachine", { } testSimpleParsets("regr.bartMachine", regr.df, regr.target, regr.train.inds, - old.predicts.list, parset.list) + old.predicts.list, parset.list) - for (i in seq_along(parset.list)){ + for (i in seq_along(parset.list)) { expect_true(length(old.predicts.list[[i]]) == nrow(regr.test)) } }) - diff --git a/tests/testthat/test_regr_bcart.R b/tests/testthat/test_regr_bcart.R index 343c3d1919..a2a179eb24 100644 --- a/tests/testthat/test_regr_bcart.R +++ b/tests/testthat/test_regr_bcart.R @@ -21,7 +21,7 @@ test_that("regr_bcart", { df.factor = createDummyFeatures(df.factor, method = "reference") df = cbind(df.num, df.factor) train = df[regr.train.inds, ] - test = df[regr.test.inds, ] + test = df[regr.test.inds, ] old.predicts.list = list() for (i in seq_along(parset.list)) { diff --git a/tests/testthat/test_regr_brnn.R b/tests/testthat/test_regr_brnn.R index fe4863e708..81a5811661 100644 --- a/tests/testthat/test_regr_brnn.R +++ b/tests/testthat/test_regr_brnn.R @@ -15,7 +15,9 @@ test_that("regr_brnn", { pars = list(formula = regr.formula, data = regr.train) pars = c(pars, parset.list[[i]]) set.seed(getOption("mlr.debug.seed")) - capture.output({m = do.call(brnn::brnn, pars)}) + capture.output({ + m = do.call(brnn::brnn, pars) + }) p = predict(m, newdata = regr.test) old.predicts.list[[i]] = p } diff --git a/tests/testthat/test_regr_bst.R b/tests/testthat/test_regr_bst.R index 569e0484c4..1100707799 100644 --- a/tests/testthat/test_regr_bst.R +++ b/tests/testthat/test_regr_bst.R @@ -30,5 +30,5 @@ test_that("classif_bst", { old.predicts.list[[i]] = predict(m, regr.num.test[, xind]) } testSimpleParsets("regr.bst", regr.num.df, regr.num.target, regr.num.train.inds, - old.predicts.list, parset.list2) + old.predicts.list, parset.list2) }) diff --git a/tests/testthat/test_regr_btgp.R b/tests/testthat/test_regr_btgp.R index 856f0f8836..0b96f8bc9c 100644 --- a/tests/testthat/test_regr_btgp.R +++ b/tests/testthat/test_regr_btgp.R @@ -18,7 +18,7 @@ test_that("regr_btgp", { df = cbind(df.num, df.factor) inds = 1:10 train = df[inds, ] - test = df[-inds, ] + test = df[-inds, ] y = regr.df[inds, regr.target] old.predicts.list = list() diff --git a/tests/testthat/test_regr_btgpllm.R b/tests/testthat/test_regr_btgpllm.R index a64d42ca71..4548f9af22 100644 --- a/tests/testthat/test_regr_btgpllm.R +++ b/tests/testthat/test_regr_btgpllm.R @@ -18,7 +18,7 @@ test_that("regr_btgpllm", { df = cbind(df.num, df.factor) inds = 1:10 train = df[inds, ] - test = df[-inds, ] + test = df[-inds, ] y = regr.df[inds, regr.target] old.predicts.list = list() diff --git a/tests/testthat/test_regr_btlm.R b/tests/testthat/test_regr_btlm.R index 3e9199147c..5b9674795f 100644 --- a/tests/testthat/test_regr_btlm.R +++ b/tests/testthat/test_regr_btlm.R @@ -21,7 +21,7 @@ test_that("regr_btlm", { df.factor = createDummyFeatures(df.factor, method = "reference") df = cbind(df.num, df.factor) train = df[regr.train.inds, ] - test = df[regr.test.inds, ] + test = df[regr.test.inds, ] old.predicts.list = list() for (i in seq_along(parset.list)) { diff --git a/tests/testthat/test_regr_crs.R b/tests/testthat/test_regr_crs.R index 2e83e46330..fdf565d484 100644 --- a/tests/testthat/test_regr_crs.R +++ b/tests/testthat/test_regr_crs.R @@ -17,7 +17,9 @@ test_that("regr_crs", { pars = list(regr.formula, data = regr.train) pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) - suppressWarnings({m = do.call(crs::crs, pars)}) + suppressWarnings({ + m = do.call(crs::crs, pars) + }) set.seed(getOption("mlr.debug.seed")) pred = predict(m, newdata = regr.test) attr(pred, "lwr") = NULL diff --git a/tests/testthat/test_regr_ctree.R b/tests/testthat/test_regr_ctree.R index 4b76d96bd3..15e7e9c6f1 100644 --- a/tests/testthat/test_regr_ctree.R +++ b/tests/testthat/test_regr_ctree.R @@ -18,7 +18,7 @@ test_that("regr_ctree", { ctrl = do.call(party::ctree_control, parset) set.seed(getOption("mlr.debug.seed")) m = party::ctree(formula = regr.formula, data = regr.train, control = ctrl) - p = predict(m, newdata = regr.test, type = "response")[, 1L] + p = predict(m, newdata = regr.test, type = "response")[, 1L] old.predicts.list[[i]] = p } diff --git a/tests/testthat/test_regr_cubist.R b/tests/testthat/test_regr_cubist.R index bc7c84c893..56a22e54f4 100644 --- a/tests/testthat/test_regr_cubist.R +++ b/tests/testthat/test_regr_cubist.R @@ -23,7 +23,7 @@ test_that("regr_cubist", { parset = c(list(x = X, y = y), parset) set.seed(getOption("mlr.debug.seed")) m = do.call(Cubist::cubist, parset) - p = predict(m, newdata = regr.test) + p = predict(m, newdata = regr.test) old.predicts.list[[i]] = p } diff --git a/tests/testthat/test_regr_earth.R b/tests/testthat/test_regr_earth.R index 221b169c7d..40f0c7118f 100644 --- a/tests/testthat/test_regr_earth.R +++ b/tests/testthat/test_regr_earth.R @@ -7,7 +7,7 @@ test_that("regr_earth", { list(), list(degree = 2), list(penalty = 4) - ) + ) old.predicts.list = list() diff --git a/tests/testthat/test_regr_evtree.R b/tests/testthat/test_regr_evtree.R index 13acdf71dd..bc39ddb6a2 100644 --- a/tests/testthat/test_regr_evtree.R +++ b/tests/testthat/test_regr_evtree.R @@ -22,5 +22,4 @@ test_that("regr_evtree", { } testSimpleParsets("regr.evtree", regr.df, regr.target, regr.train.inds, old.predicts.list, parset.list) - }) diff --git a/tests/testthat/test_regr_fnn.R b/tests/testthat/test_regr_fnn.R index dfcc500840..18d02931f6 100644 --- a/tests/testthat/test_regr_fnn.R +++ b/tests/testthat/test_regr_fnn.R @@ -29,10 +29,12 @@ test_that("regr_fnn", { testSimpleParsets("regr.fnn", rdf, regr.target, regr.train.inds, old.predicts.list1, parset.list) tt = function(formula, data, k = 3) { + j = which(colnames(data) == as.character(formula)[2]) list(train = data[, -j], y = data[, j], k = k, target = j) } tp = function(model, newdata) { + newdata = newdata[, -model$target] FNN::knn.reg(train = model$train, test = newdata, y = model$y, k = model$k)$pred } diff --git a/tests/testthat/test_regr_gamboost.R b/tests/testthat/test_regr_gamboost.R index 8aa67ba59f..a78707f3ed 100644 --- a/tests/testthat/test_regr_gamboost.R +++ b/tests/testthat/test_regr_gamboost.R @@ -7,7 +7,7 @@ test_that("regr_gamboost", { list(family = mboost::Gaussian(), baselearner = "bols", dfbase = 4, control = mboost::boost_control(nu = 0.03, mstop = 200)), list(family = mboost::GammaReg(nuirange = c(0, 50)), baselearner = "btree", - control = mboost::boost_control(mstop = 100)), + control = mboost::boost_control(mstop = 100)), list(family = mboost::Family(ngradient = function(y, f, w = 1) y - f, loss = function(y, f) (y - f)^2, name = "My Gauss Variant")) @@ -16,7 +16,7 @@ test_that("regr_gamboost", { list(), list(family = "Gaussian", baselearner = "bols", dfbase = 4, nu = 0.03, mstop = 200), list(family = "GammaReg", baselearner = "btree", nuirange = c(0, 50), mstop = 100), - list(family = "custom.family", custom.family.definition = mboost::Family(ngradient = function(y, f, w = 1) y - f, + list(family = "custom.family", custom.family.definition = mboost::Family(ngradient = function(y, f, w = 1) y - f, loss = function(y, f) (y - f)^2, name = "My Gauss Variant")) ) diff --git a/tests/testthat/test_regr_gausspr.R b/tests/testthat/test_regr_gausspr.R index 8369d69dea..059f041142 100644 --- a/tests/testthat/test_regr_gausspr.R +++ b/tests/testthat/test_regr_gausspr.R @@ -1,7 +1,6 @@ context("regr_gausspr") test_that("regr_gausspr", { - requirePackages("kernlab", default.method = "load") parset.list = list( diff --git a/tests/testthat/test_regr_gbm.R b/tests/testthat/test_regr_gbm.R index 57b4cf2cdf..ab9f72f530 100644 --- a/tests/testthat/test_regr_gbm.R +++ b/tests/testthat/test_regr_gbm.R @@ -7,7 +7,7 @@ test_that("regr_gbm", { list(), list(n.trees = 600), list(interaction.depth = 2) - ) + ) old.predicts.list = list() diff --git a/tests/testthat/test_regr_glm.R b/tests/testthat/test_regr_glm.R index b6fb79ac63..539656b5b7 100644 --- a/tests/testthat/test_regr_glm.R +++ b/tests/testthat/test_regr_glm.R @@ -33,5 +33,3 @@ test_that("regr_glm", { testSimpleParsets("regr.glm", regr.df, regr.target, regr.train.inds, old.predicts.list, parset.list) }) - - diff --git a/tests/testthat/test_regr_glmboost.R b/tests/testthat/test_regr_glmboost.R index 2cbe46b69c..b9d4c0bfe5 100644 --- a/tests/testthat/test_regr_glmboost.R +++ b/tests/testthat/test_regr_glmboost.R @@ -14,9 +14,9 @@ test_that("regr_glmboost", { list(), list(family = "Gaussian", nu = 0.03), list(family = "GammaReg", nuirange = c(0, 50), mstop = 600, center = TRUE), - list(family = "custom.family", custom.family.definition = mboost::Family(ngradient = function(y, f, w = 1) y - f, - loss = function(y, f) (y - f)^2, - name = "My Gauss Variant")) + list(family = "custom.family", custom.family.definition = mboost::Family(ngradient = function(y, f, w = 1) y - f, + loss = function(y, f) (y - f)^2, + name = "My Gauss Variant")) ) old.predicts.list = list() for (i in seq_along(parset.list1)) { diff --git a/tests/testthat/test_regr_h2odeeplearning.R b/tests/testthat/test_regr_h2odeeplearning.R index 82442b2f14..674a1904b9 100644 --- a/tests/testthat/test_regr_h2odeeplearning.R +++ b/tests/testthat/test_regr_h2odeeplearning.R @@ -11,7 +11,7 @@ test_that("regr_h2odeeplearning", { list(distribution = "quantile", quantile_alpha = 0.2), list(distribution = "tweedie", tweedie_power = 1.2) ) - #h20deeplearning needs seed in function call to be reproducible + # h20deeplearning needs seed in function call to be reproducible debug.seed = getOption("mlr.debug.seed") parset.list = lapply(parset.list, function(x) c(x, seed = debug.seed, reproducible = TRUE)) old.predicts.list = list() @@ -23,7 +23,7 @@ test_that("regr_h2odeeplearning", { training_frame = h2o::as.h2o(regr.train))) set.seed(getOption("mlr.debug.seed")) m = do.call(h2o::h2o.deeplearning, parset) - p = predict(m, newdata = h2o::as.h2o(regr.test)) + p = predict(m, newdata = h2o::as.h2o(regr.test)) old.predicts.list[[i]] = as.data.frame(p)[, 1L] } diff --git a/tests/testthat/test_regr_h2ogbm.R b/tests/testthat/test_regr_h2ogbm.R index 1f81f0ace9..10d3466c51 100644 --- a/tests/testthat/test_regr_h2ogbm.R +++ b/tests/testthat/test_regr_h2ogbm.R @@ -43,7 +43,7 @@ test_that("regr_h2ogbm", { training_frame = h2o::as.h2o(regr.train))) set.seed(getOption("mlr.debug.seed")) m = do.call(h2o::h2o.gbm, parset) - p = predict(m, newdata = h2o::as.h2o(regr.test)) + p = predict(m, newdata = h2o::as.h2o(regr.test)) old.predicts.list[[i]] = as.data.frame(p)[, 1L] } diff --git a/tests/testthat/test_regr_h2oglm.R b/tests/testthat/test_regr_h2oglm.R index cbe3d7bdc1..8199def5b3 100644 --- a/tests/testthat/test_regr_h2oglm.R +++ b/tests/testthat/test_regr_h2oglm.R @@ -19,7 +19,7 @@ test_that("regr_h2oglm", { training_frame = h2o::as.h2o(regr.train))) set.seed(getOption("mlr.debug.seed")) m = do.call(h2o::h2o.glm, parset) - p = predict(m, newdata = h2o::as.h2o(regr.test)) + p = predict(m, newdata = h2o::as.h2o(regr.test)) old.predicts.list[[i]] = as.data.frame(p)[, 1L] } diff --git a/tests/testthat/test_regr_h2orandomForest.R b/tests/testthat/test_regr_h2orandomForest.R index 9b950aefec..cf5e0939a4 100644 --- a/tests/testthat/test_regr_h2orandomForest.R +++ b/tests/testthat/test_regr_h2orandomForest.R @@ -10,7 +10,7 @@ test_that("regr_h2orandomForest", { list(ntrees = 4), list(ntrees = 4, mtries = 2) ) - #h2o.randomForest needs seed in function call to be reproducible + # h2o.randomForest needs seed in function call to be reproducible debug.seed = getOption("mlr.debug.seed") parset.list = lapply(parset.list, function(x) c(x, seed = debug.seed)) old.predicts.list = list() @@ -21,7 +21,7 @@ test_that("regr_h2orandomForest", { y = regr.target, training_frame = h2o::as.h2o(regr.train))) m = do.call(h2o::h2o.randomForest, parset) - p = predict(m, newdata = h2o::as.h2o(regr.test)) + p = predict(m, newdata = h2o::as.h2o(regr.test)) old.predicts.list[[i]] = as.data.frame(p)[, 1L] } testSimpleParsets("regr.h2o.randomForest", regr.df, regr.target, regr.train.inds, diff --git a/tests/testthat/test_regr_kknn.R b/tests/testthat/test_regr_kknn.R index 4ce4c607b6..214f41c90b 100644 --- a/tests/testthat/test_regr_kknn.R +++ b/tests/testthat/test_regr_kknn.R @@ -26,9 +26,11 @@ test_that("regr_kknn", { testSimpleParsets("regr.kknn", regr.df, regr.target, regr.train.inds, old.predicts.list, parset.list) tt = function(formula, data, k = 7) { + return(list(formula = formula, data = data, k = k)) } tp = function(model, newdata) { + kknn::kknn(model$formula, train = model$data, test = newdata, k = model$k)$fitted } diff --git a/tests/testthat/test_regr_km.R b/tests/testthat/test_regr_km.R index 800b1ebc9d..61f963039b 100644 --- a/tests/testthat/test_regr_km.R +++ b/tests/testthat/test_regr_km.R @@ -5,7 +5,7 @@ test_that("regr_km", { parset.list = list( list(), - #list(covtype="gauss"), + # list(covtype="gauss"), list(covtype = "matern5_2") ) dd = regr.num.df[1:50, ] diff --git a/tests/testthat/test_regr_laGP.R b/tests/testthat/test_regr_laGP.R index ae88c6b1f5..1221d711c0 100644 --- a/tests/testthat/test_regr_laGP.R +++ b/tests/testthat/test_regr_laGP.R @@ -15,7 +15,7 @@ test_that("regr_laGP", { for (i in seq_along(parset.list)) { parset = parset.list[[i]] pars = list(X = des1[, -regr.num.class.col], Z = y, XX = des2[, -regr.num.class.col], verb = 0, - Xi.ret = FALSE) + Xi.ret = FALSE) pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) old.predicts.list[[i]] = do.call(laGP::aGP, pars)$mean diff --git a/tests/testthat/test_regr_liquidSVM.R b/tests/testthat/test_regr_liquidSVM.R index d0b69e8320..0263121d35 100644 --- a/tests/testthat/test_regr_liquidSVM.R +++ b/tests/testthat/test_regr_liquidSVM.R @@ -14,7 +14,7 @@ test_that("regr_liquidSVM", { list(clipping = 0), list(gamma_steps = 5, min_gamma = 0.1, max_gamma = 25, lambda_steps = 5, min_lambda = 0.1, max_lambda = 25), list(useCells = TRUE), - list(gammas = c(0.1,1,10), lambdas = c(0.1,1,10), c_values = c(0.1,1,10)) + list(gammas = c(0.1, 1, 10), lambdas = c(0.1, 1, 10), c_values = c(0.1, 1, 10)) ) old.predicts.list = list() @@ -25,10 +25,9 @@ test_that("regr_liquidSVM", { set.seed(getOption("mlr.debug.seed")) m = do.call(liquidSVM::svm, pars) set.seed(getOption("mlr.debug.seed")) - p = predict(m, newdata = regr.test) + p = predict(m, newdata = regr.test) old.predicts.list[[i]] = p } testSimpleParsets("regr.liquidSVM", regr.df, regr.target, regr.train.inds, old.predicts.list, parset.list) }) - diff --git a/tests/testthat/test_regr_mob.R b/tests/testthat/test_regr_mob.R index 90200d41d6..ea0d1a6953 100644 --- a/tests/testthat/test_regr_mob.R +++ b/tests/testthat/test_regr_mob.R @@ -43,8 +43,8 @@ test_that("regr_mob", { testSimpleParsets("regr.mob", regr.df, regr.target, regr.train.inds, old.predicts.list, parset.list) # FIXME: Does not work with the extenden formula for mob! - #tt = "mob" - #tp = function(model, newdata) predict(model, newdata) + # tt = "mob" + # tp = function(model, newdata) predict(model, newdata) # # testCVParsets("regr.rpart", regr.df, regr.target, tune.train = tt, tune.predict = tp, parset.list = parset.list) }) diff --git a/tests/testthat/test_regr_nnet.R b/tests/testthat/test_regr_nnet.R index 6dbf46466f..5d5eb3400b 100644 --- a/tests/testthat/test_regr_nnet.R +++ b/tests/testthat/test_regr_nnet.R @@ -28,10 +28,10 @@ test_that("regr_nnet", { parset = list(size = 7L)) # tt = function (formula, data, subset = 1:150, ...) { - # nnet::nnet(formula, data = data[subset,], size = 3L, maxit = 50L) + # nnet::nnet(formula, data = data[subset,], size = 3L, maxit = 50L) # } # tp = function(model, newdata) as.factor(predict(model, newdata, type = "class")) # testCV("regr.nnet", regr.df, regr.target, tune.train = tt, tune.predict = tp, - # parset = list(size = 3L, maxit = 50L)) + # parset = list(size = 3L, maxit = 50L)) }) diff --git a/tests/testthat/test_regr_penalized.R b/tests/testthat/test_regr_penalized.R index 0749ebd84c..9ac4d5ece1 100644 --- a/tests/testthat/test_regr_penalized.R +++ b/tests/testthat/test_regr_penalized.R @@ -15,9 +15,9 @@ test_that("regr_penalized", { # to make test of empty list feasable (in terms of time), number of obs need to be reduced regr.train.inds = sample(seq(1, 506), size = 150) - regr.test.inds = setdiff(seq_len(nrow(regr.df)), regr.train.inds) + regr.test.inds = setdiff(seq_len(nrow(regr.df)), regr.train.inds) regr.train = regr.df[regr.train.inds, ] - regr.test = regr.df[regr.test.inds, ] + regr.test = regr.df[regr.test.inds, ] old.predicts.list = list() old.probs.list = list() @@ -47,10 +47,12 @@ test_that("regr_penalized", { ) tt = function(formula, data, subset = seq_len(nrow(data)), ...) { + penalized::penalized(formula, data = data[subset, ], ...) } tp = function(model, newdata, ...) { + penalized::predict(model, data = newdata, ...)[, "mu"] } diff --git a/tests/testthat/test_regr_randomForest.R b/tests/testthat/test_regr_randomForest.R index 8ec8458aa4..f562341895 100644 --- a/tests/testthat/test_regr_randomForest.R +++ b/tests/testthat/test_regr_randomForest.R @@ -74,7 +74,6 @@ test_that("different se.methods work", { # mean prediction should be unaffected from the se.method expect_equal(preds$bootstrap$data$response, preds$sd$data$response) expect_equal(preds$sd$data$response, preds$jackknife$data$response) - }) diff --git a/tests/testthat/test_regr_ranger.R b/tests/testthat/test_regr_ranger.R index 8eac344915..45f083d97d 100644 --- a/tests/testthat/test_regr_ranger.R +++ b/tests/testthat/test_regr_ranger.R @@ -16,7 +16,7 @@ test_that("regr_ranger", { parset = c(parset, list(data = regr.train, formula = regr.formula, respect.unordered.factors = "order")) set.seed(getOption("mlr.debug.seed")) m = do.call(ranger::ranger, parset) - p = predict(m, data = regr.test) + p = predict(m, data = regr.test) old.predicts.list[[i]] = p$predictions } diff --git a/tests/testthat/test_regr_rknn.R b/tests/testthat/test_regr_rknn.R index ed98435d97..159ddf56c0 100644 --- a/tests/testthat/test_regr_rknn.R +++ b/tests/testthat/test_regr_rknn.R @@ -8,7 +8,7 @@ test_that("regr_rknn", { mtry = c(2L, 3L) parset.grid = expand.grid(k = k, r = r, mtry = mtry) parset.list = apply(parset.grid, MARGIN = 1L, as.list) - #rknn needs integer seed for reproducibility + # rknn needs integer seed for reproducibility parset.list = lapply(parset.list, function(x) c(x, seed = 2015L)) parset.list = c(parset.list, list(list(seed = 2015L))) # to test empty paramset old.predicts.list = list() @@ -27,25 +27,27 @@ test_that("regr_rknn", { } testSimpleParsets("regr.rknn", regr.num.df, regr.num.target, regr.num.train.inds, - old.predicts.list, parset.list) + old.predicts.list, parset.list) parset.list[[9]] = NULL tt = function(formula, data, k = 1L, r = 500L, mtry = 2L, seed = 2015L, cluster = NULL) { + return(list(formula = formula, data = data, k = k, r = r, mtry = mtry, - seed = seed, cluster = cluster)) + seed = seed, cluster = cluster)) } tp = function(model, newdata) { + target = as.character(model$formula)[2L] train = model$data y = train[, target] train[, target] = NULL newdata[, target] = NULL rknn::rknnReg(data = train, y = y, newdata = newdata, k = model$k, r = model$r, - mtry = model$mtry, seed = model$seed, cluster = model$cluster)$pred + mtry = model$mtry, seed = model$seed, cluster = model$cluster)$pred } - testCVParsets(t.name = "regr.rknn", df = regr.num.df, - target = regr.num.target, tune.train = tt, tune.predict = tp, - parset.list = parset.list) + testCVParsets(t.name = "regr.rknn", df = regr.num.df, + target = regr.num.target, tune.train = tt, tune.predict = tp, + parset.list = parset.list) }) diff --git a/tests/testthat/test_regr_rpart.R b/tests/testthat/test_regr_rpart.R index 21174d917d..6c3ace5ad7 100644 --- a/tests/testthat/test_regr_rpart.R +++ b/tests/testthat/test_regr_rpart.R @@ -20,7 +20,7 @@ test_that("regr_rpart", { pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) m = do.call(rpart::rpart, pars) - p = predict(m, newdata = regr.test) + p = predict(m, newdata = regr.test) old.predicts.list[[i]] = p } diff --git a/tests/testthat/test_regr_slim.R b/tests/testthat/test_regr_slim.R index d8febbbc4e..9a0abcac78 100644 --- a/tests/testthat/test_regr_slim.R +++ b/tests/testthat/test_regr_slim.R @@ -17,7 +17,7 @@ test_that("regr_slim", { for (i in seq_along(parset.list)) { pars = list(X = as.matrix(X), Y = y) pars = c(pars, parset.list[[i]]) - if ("lambda.idx" %in% names(pars)) { + if ("lambda.idx" %in% names(pars)) { idx = pars$lambda.idx pars$lambda.idx = NULL } else { diff --git a/tests/testthat/test_regr_svm.R b/tests/testthat/test_regr_svm.R index a5e8f0abaf..f410e01f26 100644 --- a/tests/testthat/test_regr_svm.R +++ b/tests/testthat/test_regr_svm.R @@ -19,7 +19,7 @@ test_that("regr_svm", { pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) m = do.call(e1071::svm, pars) - p = predict(m, newdata = regr.test) + p = predict(m, newdata = regr.test) old.predicts.list[[i]] = p } diff --git a/tests/testthat/test_regr_xgboost.R b/tests/testthat/test_regr_xgboost.R index aa5bf715a4..39f1de2e28 100644 --- a/tests/testthat/test_regr_xgboost.R +++ b/tests/testthat/test_regr_xgboost.R @@ -19,13 +19,13 @@ test_that("regr_xgboost", { pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) model = do.call(xgboost::xgboost, pars) - #model = xgboost::xgboost(data = data.matrix(regr.num.train[,-regr.num.class.col]), verbose = 0L, - #label = as.numeric(regr.num.train[,regr.num.class.col]), - #nrounds = 20, objective = "reg:linear", missing = NULL) + # model = xgboost::xgboost(data = data.matrix(regr.num.train[,-regr.num.class.col]), verbose = 0L, + # label = as.numeric(regr.num.train[,regr.num.class.col]), + # nrounds = 20, objective = "reg:linear", missing = NULL) old.predicts.list[[i]] = predict(model, data.matrix(regr.num.test[, -regr.num.class.col])) } - #set.seed(getOption("mlr.debug.seed")) + # set.seed(getOption("mlr.debug.seed")) testSimpleParsets("regr.xgboost", regr.num.df, regr.num.target, regr.num.train.inds, old.predicts.list, parset.list) }) @@ -35,5 +35,3 @@ test_that("xgboost works with different 'missing' arg vals", { lrn = makeLearner("regr.xgboost", missing = NA) lrn = makeLearner("regr.xgboost", missing = NULL) }) - - diff --git a/tests/testthat/test_stack.R b/tests/testthat/test_stack.R index 2300f2de3a..2aa471c4a0 100644 --- a/tests/testthat/test_stack.R +++ b/tests/testthat/test_stack.R @@ -1,6 +1,7 @@ context("stack") checkStack = function(task, method, base, super, bms.pt, sm.pt, use.feat) { + base = lapply(base, makeLearner, predict.type = bms.pt) if (method %in% c("average", "hill.climb")) { super = NULL @@ -74,6 +75,7 @@ test_that("Parameters for hill climb works", { expect_equal(sum(tmp$learner.model$weights), 1) metric = function(pred, true) { + pred = colnames(pred)[max.col(pred)] tb = table(pred, true) return(1 - sum(diag(tb)) / sum(tb)) @@ -85,7 +87,6 @@ test_that("Parameters for hill climb works", { res = predict(tmp, tsk) expect_equal(sum(tmp$learner.model$weights), 1) - }) test_that("Parameters for compress model", { @@ -94,7 +95,7 @@ test_that("Parameters for compress model", { lrns = lapply(base, makeLearner) lrns = lapply(lrns, setPredictType, "prob") m = makeStackedLearner(base.learners = lrns, predict.type = "prob", method = "compress", - parset = list(k = 5, prob = 0.3)) + parset = list(k = 5, prob = 0.3)) tmp = train(m, tsk) res = predict(tmp, tsk) @@ -104,7 +105,7 @@ test_that("Parameters for compress model", { lrns = lapply(base, makeLearner) lrns = lapply(lrns, setPredictType, "response") m = makeStackedLearner(base.learners = lrns, predict.type = "response", method = "compress", - parset = list(k = 5, prob = 0.3)) + parset = list(k = 5, prob = 0.3)) tmp = train(m, tsk) res = predict(tmp, tsk) }) diff --git a/tests/testthat/test_surv_CoxBoost.R b/tests/testthat/test_surv_CoxBoost.R index 15aa3b9b30..a552ce3ee6 100644 --- a/tests/testthat/test_surv_CoxBoost.R +++ b/tests/testthat/test_surv_CoxBoost.R @@ -18,10 +18,10 @@ test_that("surv_CoxBoost", { penalty = 9 * sum(y[, "status"]) info = getFixDataInfo(surv.train, factors.to.dummies = TRUE, ordered.to.int = TRUE) pars = c(list(time = unname(y[, "time"]), status = unname(y[, "status"]), return.score = FALSE, penalty = penalty, - x = as.matrix(fixDataForLearner(x, info))), parset) + x = as.matrix(fixDataForLearner(x, info))), parset) set.seed(getOption("mlr.debug.seed")) m = do.call(CoxBoost::CoxBoost, pars) - p = as.numeric(predict(m, newdata = as.matrix(fixDataForLearner(dropNamed(surv.test, surv.target), info)), type = "lp")) + p = as.numeric(predict(m, newdata = as.matrix(fixDataForLearner(dropNamed(surv.test, surv.target), info)), type = "lp")) old.predicts.list[[i]] = as.numeric(p) } diff --git a/tests/testthat/test_surv_coxph.R b/tests/testthat/test_surv_coxph.R index e550eae55f..5faa8ecaff 100644 --- a/tests/testthat/test_surv_coxph.R +++ b/tests/testthat/test_surv_coxph.R @@ -18,7 +18,7 @@ test_that("surv_coxph", { pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) m = do.call(survival::coxph, pars) - p = predict(m, newdata = surv.test, type = "lp") + p = predict(m, newdata = surv.test, type = "lp") old.predicts.list[[i]] = p } diff --git a/tests/testthat/test_surv_cv.CoxBoost.R b/tests/testthat/test_surv_cv.CoxBoost.R index 686bdf6dd4..0a2779f898 100644 --- a/tests/testthat/test_surv_cv.CoxBoost.R +++ b/tests/testthat/test_surv_cv.CoxBoost.R @@ -9,7 +9,7 @@ test_that("surv_cv.CoxBoost", { ) old.predicts.list = list() - #i = 1 + # i = 1 for (i in seq_along(parset.list)) { parset = parset.list[[i]] y = as.matrix(surv.train[, surv.target]) @@ -40,4 +40,3 @@ test_that("surv_cv.CoxBoost", { p = predict(m, task = surv.task) } }) - diff --git a/tests/testthat/test_surv_cvglmnet.R b/tests/testthat/test_surv_cvglmnet.R index 7498dc38bb..138426ce85 100644 --- a/tests/testthat/test_surv_cvglmnet.R +++ b/tests/testthat/test_surv_cvglmnet.R @@ -28,7 +28,7 @@ test_that("surv_cvglmnet", { } else { m = do.call(glmnet::cv.glmnet, pars) } - p = predict(m, newx = as.matrix(surv.test[, -c(1, 2, 7)]), type = "link") + p = predict(m, newx = as.matrix(surv.test[, -c(1, 2, 7)]), type = "link") old.predicts.list[[i]] = as.numeric(p) } diff --git a/tests/testthat/test_surv_gamboost.R b/tests/testthat/test_surv_gamboost.R index ad742e6cb3..1c4b7a1c32 100644 --- a/tests/testthat/test_surv_gamboost.R +++ b/tests/testthat/test_surv_gamboost.R @@ -26,10 +26,9 @@ test_that("surv_gamboost", { pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) m = do.call(mboost::gamboost, pars) - p = predict(m, newdata = surv.test, type = "link") + p = predict(m, newdata = surv.test, type = "link") old.predicts.list[[i]] = drop(p) } testSimpleParsets("surv.gamboost", surv.df, surv.target, surv.train.inds, old.predicts.list, parset.list2) }) - diff --git a/tests/testthat/test_surv_gbm.R b/tests/testthat/test_surv_gbm.R index a426f1eb43..4f905e0b5a 100644 --- a/tests/testthat/test_surv_gbm.R +++ b/tests/testthat/test_surv_gbm.R @@ -7,7 +7,7 @@ test_that("surv_gbm", { list(), list(n.trees = 100L), list(interaction.depth = 2L) - ) + ) old.predicts.list = list() diff --git a/tests/testthat/test_surv_glmboost.R b/tests/testthat/test_surv_glmboost.R index 02e6a20634..48d13d13f7 100644 --- a/tests/testthat/test_surv_glmboost.R +++ b/tests/testthat/test_surv_glmboost.R @@ -26,7 +26,7 @@ test_that("surv_glmboost", { pars = c(pars, parset) set.seed(getOption("mlr.debug.seed")) m = do.call(mboost::glmboost, pars) - p = predict(m, newdata = surv.test, type = "link") + p = predict(m, newdata = surv.test, type = "link") old.predicts.list[[i]] = drop(p) } diff --git a/tests/testthat/test_surv_glmnet.R b/tests/testthat/test_surv_glmnet.R index 2c9bf37c94..b3009fd8f4 100644 --- a/tests/testthat/test_surv_glmnet.R +++ b/tests/testthat/test_surv_glmnet.R @@ -29,7 +29,7 @@ test_that("surv_glmnet", { } else { m = do.call(glmnet::glmnet, pars) } - p = predict(m, newx = as.matrix(surv.test[, -c(1, 2, 7)]), type = "link", s = 0.01) + p = predict(m, newx = as.matrix(surv.test[, -c(1, 2, 7)]), type = "link", s = 0.01) old.predicts.list[[i]] = as.numeric(p) } diff --git a/tests/testthat/test_surv_measures.R b/tests/testthat/test_surv_measures.R index 24867742c5..7e2341f13b 100644 --- a/tests/testthat/test_surv_measures.R +++ b/tests/testthat/test_surv_measures.R @@ -19,8 +19,9 @@ test_that("survival measures do not do stupid things", { r = range(measure$worst, measure$best) x = aggr[[sprintf("%s.test.mean", measure$id)]] expect_number(x, lower = r[1], upper = r[2], label = sprintf("%s/%s", lrn$id, measure$id)) - if (!anyInfinite(r)) + if (!anyInfinite(r)) { expect_true(abs(x - measure$worst) >= abs(x - measure$best), label = sprintf("%s/%s", lrn$id, measure$id)) + } } } }) diff --git a/tests/testthat/test_tuneParams.R b/tests/testthat/test_tuneParams.R index cf10ed3c26..a145f58ec3 100644 --- a/tests/testthat/test_tuneParams.R +++ b/tests/testthat/test_tuneParams.R @@ -41,7 +41,7 @@ test_that("tuneParams with resample.fun", { ctrl = suppressWarnings({ # this currently is a warning because printHead is in mlr and BBmisc - makeTuneControlMBO(budget = 10, learner = "regr.lm") + makeTuneControlMBO(budget = 10, learner = "regr.lm") }) tr = tuneParams(lrn, multiclass.task, rdesc, par.set = ps, control = ctrl, resample.fun = constant05Resample) expect_true(all(getOptPathY(tr$opt.path) == 0.5)) diff --git a/tests/testthat/test_tune_ModelMultiplexer.R b/tests/testthat/test_tune_ModelMultiplexer.R index 7b06c13918..88d1c27158 100644 --- a/tests/testthat/test_tune_ModelMultiplexer.R +++ b/tests/testthat/test_tune_ModelMultiplexer.R @@ -68,14 +68,18 @@ test_that("FailureModel works", { expect_false(isFailureModel(mod)) lrn = setHyperPars(lrn, classif.__mlrmocklearners__2.alpha = 0) - expect_warning({mod = train(lrn, task = iris.task)}, "foo") + expect_warning({ + mod = train(lrn, task = iris.task) + }, "foo") expect_true(isFailureModel(mod)) tmp = getMlrOptions()$on.learner.error configureMlr(on.learner.error = "warn") lrn = setHyperPars(lrn, classif.__mlrmocklearners__2.alpha = 1) lrn = removeHyperPars(lrn, "selected.learner") - expect_warning({mod = train(lrn, task = iris.task)}) + expect_warning({ + mod = train(lrn, task = iris.task) + }) expect_true(isFailureModel(mod)) configureMlr(on.learner.error = tmp) }) @@ -152,23 +156,25 @@ test_that("ModelMultiplexer handles tasks with no features", { # issue #760 test_that("ModelMultiplexer passes on hyper pars in predict with both", { test.ps = makeRLearnerClassif("test.ps", character(0), - makeParamSet(makeIntegerLearnerParam("tpTRAIN", when = "train"), - makeIntegerLearnerParam("tpPREDICT", when = "predict"), - makeIntegerLearnerParam("tpBOTH", when = "both")), - properties = c("numerics", "twoclass")) + makeParamSet(makeIntegerLearnerParam("tpTRAIN", when = "train"), + makeIntegerLearnerParam("tpPREDICT", when = "predict"), + makeIntegerLearnerParam("tpBOTH", when = "both")), + properties = c("numerics", "twoclass")) test.ps$fix.factors.prediction = TRUE opts = NULL trainLearner.test.ps = function(.learner, .task, .subset, .weights = NULL, ...) { - opts <<- list(...) # nolint + + opts <<- list(...) # nolint # the following to make the type checking happy list(dummy = getTaskData(.task, .subset)[[getTaskTargetNames(.task)[1]]][1]) } registerS3method("trainLearner", "test.ps", trainLearner.test.ps) predictLearner.test.ps = function(.learner, .model, .newdata, ...) { - opts <<- list(...) # nolint - rep(.model$learner.model$dummy, nrow(.newdata)) # just do something + + opts <<- list(...) # nolint + rep(.model$learner.model$dummy, nrow(.newdata)) # just do something } registerS3method("predictLearner", "test.ps", predictLearner.test.ps) diff --git a/tests/testthat/test_tune_getTuneResultOptPath.R b/tests/testthat/test_tune_getTuneResultOptPath.R index ee217da929..308f85fbae 100644 --- a/tests/testthat/test_tune_getTuneResultOptPath.R +++ b/tests/testthat/test_tune_getTuneResultOptPath.R @@ -1,16 +1,15 @@ context("getTuneResultOptPath") test_that("getTuneResultOptPath", { - ctrl = makeTuneControlRandom(maxit = 10L) rdesc = makeResampleDesc("CV", iters = 3L) ps = makeParamSet( makeDiscreteParam("C", values = seq(1:1000) - )) + )) rdesc = makeResampleDesc("CV", iters = 3L) res = tuneParams("classif.ksvm", task = iris.task, resampling = rdesc, - par.set = ps, control = ctrl) + par.set = ps, control = ctrl) expect_equal(res$opt.path, getTuneResultOptPath(res, as.df = FALSE)) expect_equal(as.data.frame(res$opt.path), getTuneResultOptPath(res)) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test_tune_tuneGrid.R b/tests/testthat/test_tune_tuneGrid.R index e4273e9493..984bf0a455 100644 --- a/tests/testthat/test_tune_tuneGrid.R +++ b/tests/testthat/test_tune_tuneGrid.R @@ -45,7 +45,6 @@ test_that("tuneGrid", { op2.2 = as.data.frame(trafoOptPath(tr2.2$opt.path)) op1.2$exec.time = op2.2$exec.time = NULL expect_equal(sortByCol(op1.2, c("C", "sigma")), sortByCol(op2.2, c("C", "sigma"))) - }) test_that("tuneGrid works with dependent params", { diff --git a/tests/testthat/test_tune_tuneIrace.R b/tests/testthat/test_tune_tuneIrace.R index 41f0e0ef7b..8471689146 100644 --- a/tests/testthat/test_tune_tuneIrace.R +++ b/tests/testthat/test_tune_tuneIrace.R @@ -50,7 +50,6 @@ test_that("tuneIrace works with dependent params", { ctrl = makeTuneControlRandom(maxit = 5L) rdesc = makeResampleDesc("Holdout") res = tuneParams("classif.ksvm", sonar.task, rdesc, par.set = ps, control = ctrl) - }) # we had a bug here @@ -158,10 +157,9 @@ test_that("irace handles parameters with unsatisfiable requirement gracefully", lrn = makeLearner("classif.J48") ctrl = makeTuneControlIrace(maxExperiments = 20L, nbIterations = 1L, minNbSurvival = 1L) - ps = makeParamSet(makeNumericParam("C", 0.1, 0.3, requires = quote(R != R)), makeLogicalParam("R")) # C never feasible + ps = makeParamSet(makeNumericParam("C", 0.1, 0.3, requires = quote(R != R)), makeLogicalParam("R")) # C never feasible res = tuneParams(lrn, pid.task, hout, par.set = ps, control = ctrl) - ps = makeParamSet(makeNumericParam("C", 0.1, 0.3), makeLogicalParam("R", requires = quote(C > 1))) # R never feasible + ps = makeParamSet(makeNumericParam("C", 0.1, 0.3), makeLogicalParam("R", requires = quote(C > 1))) # R never feasible res = tuneParams(lrn, sonar.task, hout, par.set = ps, control = ctrl) }) - diff --git a/tests/testthat/test_tune_tuneParamsMultiCrit.R b/tests/testthat/test_tune_tuneParamsMultiCrit.R index b0001d2bd4..ea7de1c504 100644 --- a/tests/testthat/test_tune_tuneParamsMultiCrit.R +++ b/tests/testthat/test_tune_tuneParamsMultiCrit.R @@ -1,7 +1,7 @@ context("tuneParamsMultiCrit") test_that("tuneParamsMultiCrit", { - lrn = makeLearner("classif.rpart") + lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("Holdout") ps = makeParamSet( makeIntegerParam("minsplit", lower = 1, upper = 50) @@ -9,9 +9,10 @@ test_that("tuneParamsMultiCrit", { ctrl = makeTuneMultiCritControlRandom(maxit = 2) expect_error(tuneParamsMultiCrit(lrn, binaryclass.task, rdesc, par.set = ps, measures = mmce, control = ctrl), - ".* May only contain the following types: Measure.") + ".* May only contain the following types: Measure.") mycheck = function(res, k) { + expect_output(print(res), "Points on front") expect_true(is.integer(res$ind)) expect_true(is.list(res$x)) @@ -105,7 +106,7 @@ test_that("y imputing works", { }) test_that("tuneParamsMultiCrit with budget", { - lrn = makeLearner("classif.rpart") + lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("Holdout") ps = makeParamSet( makeNumericParam("cp", lower = 0.001, upper = 1), @@ -113,9 +114,11 @@ test_that("tuneParamsMultiCrit with budget", { ) mycheck = function(ctrl, expected.budget) { + if ("TuneMultiCritControlGrid" %in% class(ctrl)) { - if (!is.null(ctrl$budget)) + if (!is.null(ctrl$budget)) { expect_equal(ctrl$budget, expected.budget) + } } else { expect_equal(ctrl$budget, expected.budget) } @@ -140,7 +143,7 @@ test_that("tuneParamsMultiCrit with budget", { ctrl = makeTuneMultiCritControlGrid(resolution = 3, budget = 10L) expect_error(tuneParamsMultiCrit(lrn, binaryclass.task, rdesc, par.set = ps, measures = list(tpr, fpr), control = ctrl), - ".* does not fit to the size of the grid .*") + ".* does not fit to the size of the grid .*") # nsga2 ctrl = makeTuneMultiCritControlNSGA2(popsize = 4L, generations = 1L) @@ -158,7 +161,7 @@ test_that("plotTuneMultiCritResult works with pretty.names", { lrn = makeLearner("classif.rpart") ps = makeParamSet( makeDiscreteParam("minsplit", values = c(5, 10)) - ) + ) ctrl.grid = makeTuneMultiCritControlGrid() opt.multi.crit = tuneParamsMultiCrit(lrn, multiclass.task, hout, list(mmce, acc), par.set = ps, control = ctrl.grid) @@ -167,7 +170,7 @@ test_that("plotTuneMultiCritResult works with pretty.names", { }) test_that("tuneParamsMultiCrit with resample.fun", { - lrn = makeLearner("classif.rpart") + lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("Holdout") ps = makeParamSet( makeIntegerParam("minsplit", lower = 1, upper = 50) @@ -199,7 +202,7 @@ test_that("tuneParamsMultiCrit with resample.fun", { }) test_that("check n.objectives for MBO multi crit", { - lrn = makeLearner("classif.rpart") + lrn = makeLearner("classif.rpart") rdesc = makeResampleDesc("Holdout") ps = makeParamSet( makeIntegerParam("minsplit", lower = 1, upper = 50) @@ -213,8 +216,8 @@ test_that("check n.objectives for MBO multi crit", { expect_error(tuneParamsMultiCrit(lrn, binaryclass.task, rdesc, measures = list(mmce), par.set = ps, control = ctrl), - ".* Must have length >= 2, but has length 1.") + ".* Must have length >= 2, but has length 1.") expect_error(tuneParamsMultiCrit(lrn, binaryclass.task, rdesc, measures = list(mmce, tpr, fpr), par.set = ps, control = ctrl), - ".* Must have length 2, but has length 3.") + ".* Must have length 2, but has length 3.") }) diff --git a/tests/testthat/test_tune_tuneThreshold.R b/tests/testthat/test_tune_tuneThreshold.R index ce81f98221..e54662f463 100644 --- a/tests/testthat/test_tune_tuneThreshold.R +++ b/tests/testthat/test_tune_tuneThreshold.R @@ -24,7 +24,7 @@ test_that("tuheThreshold works with all tuning methods", { ctrls = list( gensa = makeTuneControlGenSA(start = list(nu = 2.5), maxit = 1, tune.threshold = TRUE), cmaes = makeTuneControlCMAES(start = list(nu = 2.5), maxit = 1, tune.threshold = TRUE), - design = makeTuneControlDesign(design = generateDesign(n = 2, par.set = ps), tune.threshold = TRUE), + design = makeTuneControlDesign(design = generateDesign(n = 2, par.set = ps), tune.threshold = TRUE), grid = makeTuneControlGrid(resolution = 2L, tune.threshold = TRUE), irace = makeTuneControlIrace(maxExperiments = 12, nbIterations = 1L, minNbSurvival = 1, tune.threshold = TRUE) ) @@ -33,5 +33,4 @@ test_that("tuheThreshold works with all tuning methods", { res = resample(lrn.tuned, binaryclass.task, resampling = makeResampleDesc("Holdout"), extract = getTuneResult) expect_number(res$extract[[1]]$threshold) } - }) From b60617ac5b14b93eafa5225b43f662f63d9e84fd Mon Sep 17 00:00:00 2001 From: GitHub Date: Sun, 14 Apr 2019 23:26:53 +0000 Subject: [PATCH 2/4] Deploy from Travis build TRUE [ci skip] Build URL: https://travis-ci.org/mlr-org/mlr/builds/TRUE Commit: ed3dda1b68daeb99ca255ea33706d447e72d7cd6 --- docs/articles/tutorial/create_imputation.html | 21 ++++++++++--------- docs/articles/tutorial/create_learner.html | 9 +++++--- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/docs/articles/tutorial/create_imputation.html b/docs/articles/tutorial/create_imputation.html index 6e0658bd28..2cb3b765fc 100644 --- a/docs/articles/tutorial/create_imputation.html +++ b/docs/articles/tutorial/create_imputation.html @@ -307,18 +307,19 @@

    imputeMean (imputations()) calls the unexported mlr function simpleImpute which is defined as follows.

    + } + x = data[[col]] + if (is.logical(x) && !is.logical(const)) { + x = as.factor(x) + } + if (is.factor(x) && const %nin% levels(x)) { + levels(x) = c(levels(x), as.character(const)) + } + replace(x, is.na(x), const) +}

    The learn function calculates the mean of the non-missing observations in column col. The mean is passed via argument const to the impute function that replaces all missing values in feature col.

    diff --git a/docs/articles/tutorial/create_learner.html b/docs/articles/tutorial/create_learner.html index af35d9b564..e13d55be13 100644 --- a/docs/articles/tutorial/create_learner.html +++ b/docs/articles/tutorial/create_learner.html @@ -411,10 +411,13 @@

    { p = predict(.model$learner.model, newdata = .newdata, method = predict.method, ...) - if (.learner$predict.type == "response") + if (.learner$predict.type == "response") { return(p$class) - else return(p$posterior) -}

    + } + else { + return(p$posterior) + } +}
    From 5b3b76f3a69545a9b5d2944b47235e7f97040a23 Mon Sep 17 00:00:00 2001 From: pat-s Date: Sun, 21 Apr 2019 23:11:16 +0200 Subject: [PATCH 3/4] new version --- R/aggregations.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/R/aggregations.R b/R/aggregations.R index 3bb86b20be..bda4ffb0f6 100644 --- a/R/aggregations.R +++ b/R/aggregations.R @@ -206,9 +206,11 @@ b632plus = makeAggregation( y1 = df2$truth y2 = df2$response grid = expand.grid(y1, y2, KEEP.OUT.ATTRS = FALSE) - pred2 = makePrediction(task.desc = pred$task.desc, row.names = rownames(grid), + pred2 = makePrediction( + task.desc = pred$task.desc, row.names = rownames(grid), id = NULL, truth = grid[, 1L], predict.type = "response", y = grid[, 2L], - time = NA_real_) + time = NA_real_ + ) gamma = performance(pred2, measures = measure) R = (perf.test[i] - perf.train[i]) / (gamma - perf.train[i]) w = 0.632 / (1 - 0.368 * R) @@ -256,9 +258,11 @@ test.join = makeAggregation( y = df[, stri_startswith_fixed(colnames(df), "prob."), drop = FALSE] colnames(y) = stri_sub(colnames(y), 6L) } - npred = makePrediction(task.desc = pred$task.desc, row.names = rownames(df), + npred = makePrediction( + task.desc = pred$task.desc, row.names = rownames(df), id = NULL, truth = df$truth, predict.type = pred$predict.type, y = y, - time = NA_real_) + time = NA_real_ + ) performance(npred, measure) })) }) From 2119093883fcc2488e7ecaa7c261fd066dd2d0d9 Mon Sep 17 00:00:00 2001 From: pat-s Date: Sun, 21 Apr 2019 22:05:35 +0000 Subject: [PATCH 4/4] Deploy from Travis build TRUE [ci skip] Build URL: https://travis-ci.org/mlr-org/mlr/builds/TRUE Commit: 5b3b76f3a69545a9b5d2944b47235e7f97040a23 --- docs/ISSUE_TEMPLATE.html | 2 +- docs/articles/tutorial/create_learner.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ISSUE_TEMPLATE.html b/docs/ISSUE_TEMPLATE.html index 01a3cf99ba..ed410ceadd 100644 --- a/docs/ISSUE_TEMPLATE.html +++ b/docs/ISSUE_TEMPLATE.html @@ -308,7 +308,7 @@

    Bug report

    • [ ] Start a new R session
    • -
    • [ ] Install the latest version of mlr: update.packages(oldPkgs="mlr", ask=FALSE) or if you use a GitHub install of mlr: devtools::install_github(c("BBmisc", "ParamHelpers", "mlr")) +
    • [ ] Install the latest version of mlr: update.packages(oldPkgs="mlr", ask=FALSE) or if you use a GitHub install of mlr: devtools::install_github(c("BBmisc", "ParamHelpers", "mlr"))
    • [ ] run sessionInfo()
    • diff --git a/docs/articles/tutorial/create_learner.html b/docs/articles/tutorial/create_learner.html index 77d8590c28..c7befd985c 100644 --- a/docs/articles/tutorial/create_learner.html +++ b/docs/articles/tutorial/create_learner.html @@ -699,7 +699,7 @@

      All tested parameter configurations are collected in the parset.list. In order to make sure that the default parameter configuration is tested the first element of the parset.list is an empty list (base::list()). Then we simply loop over all parameter settings and store the resulting predictions in old.predicts.list. Again the helper function testSimpleParsets does the same using the mlr interface and compares the outcomes.

      Additional to tests for individual learners we also have general tests that loop through all integrated learners and make for example sure that learners have the correct properties (e.g. a learner with property "factors" can cope with factor (base::factor()) features, a learner with property "weights" takes observation weights into account properly etc.). For example https://github.com/mlr-org/mlr/blob/master/tests/testthat/test_learners_all_classif.R runs through all classification learners. Similar tests exist for all types of learning methods like regression, cluster and survival analysis as well as multilabel classification.

      In order to run all tests for, e.g., classification learners on your machine you can invoke the tests from within R by

      -
      devtools::test("mlr", filter = "classif")
      +

      or from the command line using Michel’s rt tool

      rtest --filter=classif