diff --git a/mjpc/planners/cross_entropy/planner.cc b/mjpc/planners/cross_entropy/planner.cc index 2177dbf7e..25adc6708 100644 --- a/mjpc/planners/cross_entropy/planner.cc +++ b/mjpc/planners/cross_entropy/planner.cc @@ -55,8 +55,11 @@ void CrossEntropyPlanner::Initialize(mjModel* model, const Task& task) { // sampling noise std_initial_ = GetNumberOrDefault(0.1, model, - "sampling_exploration"); // initial variance - std_min_ = GetNumberOrDefault(0.1, model, "std_min"); // minimum variance + "sampling_exploration"); // initial variance + std_min_ = GetNumberOrDefault(0.01, model, "std_min"); // minimum variance + // fraction of the trajectories that will use full exploration noise + explore_fraction_ = + GetNumberOrDefault(0.0, model, "explore_fraction"); // set number of trajectories to rollout num_trajectory_ = GetNumberOrDefault(10, model, "sampling_trajectories"); @@ -389,14 +392,21 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon, // lock std_min double std_min = std_min_; + double std_initial = std_initial_; // random search int count_before = pool.GetCount(); for (int i = 0; i < num_trajectory; i++) { + double std; + if (i < num_trajectory * explore_fraction_) { + std = std_initial; + } else { + std = std_min; + } pool.Schedule([&s = *this, &model = this->model, &task = this->task, &state = this->state, &time = this->time, &mocap = this->mocap, &userdata = this->userdata, horizon, - std_min, i]() { + std, i]() { // copy nominal policy and sample noise { const std::shared_lock lock(s.mtx_); @@ -406,7 +416,7 @@ void CrossEntropyPlanner::Rollouts(int num_trajectory, int horizon, s.resampled_policy.plan.Interpolation()); // sample noise - s.AddNoiseToPolicy(i, std_min); + s.AddNoiseToPolicy(i, std); } // ----- rollout sample policy ----- // @@ -491,6 +501,7 @@ void CrossEntropyPlanner::GUI(mjUI& ui) { {mjITEM_SLIDERINT, "Spline Pts", 2, &policy.num_spline_points, "0 1"}, {mjITEM_SLIDERNUM, "Init. Std", 2, &std_initial_, "0 1"}, {mjITEM_SLIDERNUM, "Min. Std", 2, &std_min_, "0.01 0.5"}, + {mjITEM_SLIDERNUM, "Explore", 2, &explore_fraction_, "0.0 1.0"}, {mjITEM_SLIDERINT, "Elite", 2, &n_elite_, "2 128"}, {mjITEM_END}}; diff --git a/mjpc/planners/cross_entropy/planner.h b/mjpc/planners/cross_entropy/planner.h index 0f32f4bf9..88d5ca9d1 100644 --- a/mjpc/planners/cross_entropy/planner.h +++ b/mjpc/planners/cross_entropy/planner.h @@ -122,6 +122,8 @@ class CrossEntropyPlanner : public Planner { double std_initial_; // standard deviation for sampling normal: N(0, // std) double std_min_; // the minimum allowable std + double explore_fraction_ = 0; // fraction of trajectories that will use + // std_initial instead of the variance from CEM std::vector noise; std::vector variance; diff --git a/mjpc/tasks/shadow_reorient/task.xml b/mjpc/tasks/shadow_reorient/task.xml index d3802238e..f630a0cbc 100644 --- a/mjpc/tasks/shadow_reorient/task.xml +++ b/mjpc/tasks/shadow_reorient/task.xml @@ -4,13 +4,17 @@ - + - + + + + +