Merge pull request #309 from google-deepmind/deepmind

Merge deepmind branch into main.
google-deepmind · Mar 27, 2024 · 6d65ce4 · 6d65ce4
2 parents 4700f4a + e95fffb
commit 6d65ce4
Show file tree

Hide file tree

Showing 12 changed files with 258 additions and 228 deletions.
diff --git a/README.md b/README.md
@@ -96,8 +96,8 @@ sudo apt-get install libgl1-mesa-dev libxinerama-dev libxcursor-dev libxrandr-de
 
 ### Build Issues
 If you encounter build issues, please see the
-[Github Actions configuration](https://github.com/google-deepmind/mujoco_mpc/blob/main/.github/workflows/build.yml). 
-This provides the exact setup we use for building MJPC for testing. 
+[Github Actions configuration](https://github.com/google-deepmind/mujoco_mpc/blob/main/.github/workflows/build.yml).
+This provides the exact setup we use for building MJPC for testing.
 
 We recommend building with `clang` and not `gcc`.
 

diff --git a/docs/ESTIMATORS.md b/docs/ESTIMATORS.md
@@ -177,4 +177,4 @@ P_{t+1:T | 0:t} = P_{[t+1:T, t+1:T]} - P_{[t+1:T, 0:t]} P_{[0:t, 0:t]}^{-1} P_{[
 [Physically-Consistent Sensor Fusion in Contact-Rich Behaviors](https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=31c0842aa1d4808541a64014c24928123e1d949e).
 Kendall Lowrey, Svetoslav Kolev, Yuval Tassa, Tom Erez, Emo Todorov. 2014.
 
-[Batch API](../mjpc/estimators/batch.h)
+[Batch API](../mjpc/estimators/batch.h)
diff --git a/mjpc/app.cc b/mjpc/app.cc
@@ -29,7 +29,6 @@
 #include <vector>
 
 #include <absl/flags/flag.h>
-#include <absl/strings/match.h>
 #include <mujoco/mujoco.h>
 #include <glfw_adapter.h>
 #include "mjpc/array_safety.h"
@@ -40,8 +39,6 @@
 #include "mjpc/threadpool.h"
 #include "mjpc/utilities.h"
 
-ABSL_FLAG(std::string, task, "Quadruped Flat",
-          "Which model to load on startup.");
 ABSL_FLAG(bool, planner_enabled, false,
           "If true, the planner will run on startup");
 ABSL_FLAG(float, sim_percent_realtime, 100,
@@ -393,7 +390,7 @@ void PhysicsLoop(mj::Simulate& sim) {
 
 namespace mjpc {
 
-MjpcApp::MjpcApp(std::vector<std::shared_ptr<mjpc::Task>> tasks) {
+MjpcApp::MjpcApp(std::vector<std::shared_ptr<mjpc::Task>> tasks, int task_id) {
   // MJPC
   printf("MuJoCo MPC (MJPC)\n");
 
@@ -415,18 +412,7 @@ MjpcApp::MjpcApp(std::vector<std::shared_ptr<mjpc::Task>> tasks) {
       std::make_shared<Agent>());
 
   sim->agent->SetTaskList(std::move(tasks));
-  std::string task_name = absl::GetFlag(FLAGS_task);
-  if (task_name.empty()) {  // shouldn't happen, flag has a default value
-    sim->agent->gui_task_id = 0;
-  } else {
-    sim->agent->gui_task_id = sim->agent->GetTaskIdByName(task_name);
-    if (sim->agent->gui_task_id == -1) {
-      std::cerr << "Invalid --task flag: '" << task_name
-                << "'. Valid values:\n";
-      std::cerr << sim->agent->GetTaskNames();
-      mju_error("Invalid --task flag.");
-    }
-  }
+  sim->agent->gui_task_id = task_id;
 
   sim->filename = sim->agent->GetTaskXmlPath(sim->agent->gui_task_id);
   m = LoadModel(sim->agent.get(), *sim);
@@ -520,8 +506,8 @@ mj::Simulate* MjpcApp::Sim() {
   return sim.get();
 }
 
-void StartApp(std::vector<std::shared_ptr<mjpc::Task>> tasks) {
-  MjpcApp app(std::move(tasks));
+void StartApp(std::vector<std::shared_ptr<mjpc::Task>> tasks, int task_id) {
+  MjpcApp app(std::move(tasks), task_id);
   app.Start();
 }
 

diff --git a/mjpc/app.h b/mjpc/app.h
@@ -24,7 +24,7 @@
 namespace mjpc {
 class MjpcApp {
  public:
-  explicit MjpcApp(std::vector<std::shared_ptr<mjpc::Task>> tasks);
+  MjpcApp(std::vector<std::shared_ptr<mjpc::Task>> tasks, int task_id = 0);
   MjpcApp(const MjpcApp&) = delete;
   MjpcApp& operator=(const MjpcApp&) = delete;
   ~MjpcApp();
@@ -34,7 +34,7 @@ class MjpcApp {
   ::mujoco::Simulate* Sim();
 };
 
-void StartApp(std::vector<std::shared_ptr<mjpc::Task>> tasks);
+void StartApp(std::vector<std::shared_ptr<mjpc::Task>> tasks, int task_id = 0);
 
 }  // namespace mjpc
 

diff --git a/mjpc/direct/direct.h b/mjpc/direct/direct.h
@@ -20,7 +20,6 @@
 #include <vector>
 
 #include <mujoco/mujoco.h>
-
 #include "mjpc/direct/model_parameters.h"
 #include "mjpc/direct/trajectory.h"
 #include "mjpc/norm.h"
@@ -292,19 +291,19 @@ class Direct {
   void ParameterJacobian(int index);
 
   // dimensions
-  int nstate_;
-  int ndstate_;
-  int nsensordata_;
-  int nsensor_;
+  int nstate_ = 0;
+  int ndstate_ = 0;
+  int nsensordata_ = 0;
+  int nsensor_ = 0;
 
-  int ntotal_;  // total number of decision variable
-  int nvel_;    // number of configuration (derivatives) variables
-  int nparam_;  // number of parameter variable (ndense)
-  int nband_;   // cost Hessian band dimension
+  int ntotal_ = 0;  // total number of decision variable
+  int nvel_ = 0;    // number of configuration (derivatives) variables
+  int nparam_ = 0;  // number of parameter variable (ndense)
+  int nband_ = 0;   // cost Hessian band dimension
 
   // sensor indexing
-  int sensor_start_;
-  int sensor_start_index_;
+  int sensor_start_ = 0;
+  int sensor_start_index_ = 0;
 
   // perturbed models (for parameter estimation)
   std::vector<UniqueMjModel> model_perturb_;
@@ -313,15 +312,15 @@ class Direct {
   std::vector<UniqueMjData> data_;
 
   // cost
-  double cost_sensor_;
-  double cost_force_;
-  double cost_parameter_;
-  double cost_;
-  double cost_initial_;
-  double cost_previous_;
+  double cost_sensor_ = 0.0;
+  double cost_force_ = 0.0;
+  double cost_parameter_ = 0.0;
+  double cost_ = 0.0;
+  double cost_initial_ = 0.0;
+  double cost_previous_ = 0.0;
 
   // lengths
-  int configuration_length_;  // T
+  int configuration_length_ = 0;  // T
 
   // configuration copy
   DirectTrajectory<double> configuration_copy_;  // nq x max_history_
@@ -443,58 +442,58 @@ class Direct {
   // dense cost Hessian rows (for parameter derivatives)
   std::vector<double> dense_force_parameter_;   // nparam x ntotal
   std::vector<double> dense_sensor_parameter_;  // nparam x ntotal
-  std::vector<double> dense_parameter_;   // nparam x ntotal
+  std::vector<double> dense_parameter_;         // nparam x ntotal
 
   // model parameters
   std::vector<std::unique_ptr<mjpc::ModelParameters>> model_parameters_;
-  int model_parameters_id_;
+  int model_parameters_id_ = 0;
 
   // status (internal)
-  int cost_count_;          // number of cost evaluations
+  int cost_count_ = 0;      // number of cost evaluations
   bool cost_skip_ = false;  // flag for only evaluating cost derivatives
 
   // status (external)
-  int iterations_smoother_;       // total smoother iterations after Optimize
-  int iterations_search_;         // total line search iterations
-  double gradient_norm_;          // norm of cost gradient
-  double regularization_;         // regularization
-  double step_size_;              // step size for line search
-  double search_direction_norm_;  // search direction norm
-  DirectStatus solve_status_;     // solve status
-  double cost_difference_;        // cost difference: abs(cost - cost_previous)
-  double improvement_;            // cost improvement
-  double expected_;               // expected cost improvement
-  double reduction_ratio_;  // reduction ratio: cost_improvement / expected cost
-                            // improvement
+  int iterations_smoother_ = 0;  // total smoother iterations after Optimize
+  int iterations_search_ = 0;    // total line search iterations
+  double gradient_norm_ = 0.0;   // norm of cost gradient
+  double regularization_ = 0.0;  // regularization
+  double step_size_ = 0.0;       // step size for line search
+  double search_direction_norm_ = 0.0;     // search direction norm
+  DirectStatus solve_status_ = kUnsolved;  // solve status
+  double cost_difference_ = 0.0;  // cost difference: abs(cost - cost_previous)
+  double improvement_ = 0.0;      // cost improvement
+  double expected_ = 0.0;         // expected cost improvement
+  double reduction_ratio_ = 0.0;  // reduction ratio: cost_improvement /
+                                  // expected cost improvement
 
   // timers
   struct DirectTimers {
-    double inverse_dynamics_derivatives;
-    double velacc_derivatives;
-    double jacobian_sensor;
-    double jacobian_force;
-    double jacobian_total;
-    double cost_sensor_derivatives;
-    double cost_force_derivatives;
-    double cost_total_derivatives;
-    double cost_gradient;
-    double cost_hessian;
-    double cost_derivatives;
-    double cost;
-    double cost_sensor;
-    double cost_force;
-    double cost_config_to_velacc;
-    double cost_prediction;
-    double residual_sensor;
-    double residual_force;
-    double search_direction;
-    double search;
-    double configuration_update;
-    double optimize;
-    double update_trajectory;
+    double inverse_dynamics_derivatives = 0.0;
+    double velacc_derivatives = 0.0;
+    double jacobian_sensor = 0.0;
+    double jacobian_force = 0.0;
+    double jacobian_total = 0.0;
+    double cost_sensor_derivatives = 0.0;
+    double cost_force_derivatives = 0.0;
+    double cost_total_derivatives = 0.0;
+    double cost_gradient = 0.0;
+    double cost_hessian = 0.0;
+    double cost_derivatives = 0.0;
+    double cost = 0.0;
+    double cost_sensor = 0.0;
+    double cost_force = 0.0;
+    double cost_config_to_velacc = 0.0;
+    double cost_prediction = 0.0;
+    double residual_sensor = 0.0;
+    double residual_force = 0.0;
+    double search_direction = 0.0;
+    double search = 0.0;
+    double configuration_update = 0.0;
+    double optimize = 0.0;
+    double update_trajectory = 0.0;
     std::vector<double> sensor_step;
     std::vector<double> force_step;
-    double update;
+    double update = 0.0;
   } timer_;
 
   // max history

diff --git a/mjpc/main.cc b/mjpc/main.cc
@@ -12,18 +12,19 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include <cstdlib>
-#include <cstring>
 #include <iostream>
-#include <ostream>
+#include <string>
 #include <vector>
 #include <absl/flags/parse.h>
 
+#include <absl/flags/flag.h>
+#include <absl/strings/match.h>
+#include <mujoco/mujoco.h>
 #include "mjpc/app.h"
-#include "mjpc/task.h"
-#include "mjpc/utilities.h"
 #include "mjpc/tasks/tasks.h"
 
+ABSL_FLAG(std::string, task, "Quadruped Flat",
+          "Which model to load on startup.");
 
 // machinery for replacing command line error by a macOS dialog box
 // when running under Rosetta
@@ -47,6 +48,24 @@ int main(int argc, char** argv) {
 #endif
   absl::ParseCommandLine(argc, argv);
 
-  mjpc::StartApp(mjpc::GetTasks());
+  std::string task_name = absl::GetFlag(FLAGS_task);
+  auto tasks = mjpc::GetTasks();
+  int task_id = -1;
+  for (int i = 0; i < tasks.size(); i++) {
+    if (absl::EqualsIgnoreCase(task_name, tasks[i]->Name())) {
+      task_id = i;
+      break;
+    }
+  }
+  if (task_id == -1) {
+    std::cerr << "Invalid --task flag: '" << task_name
+              << "'. Valid values:\n";
+    for (int i = 0; i < tasks.size(); i++) {
+      std::cerr << "  " << tasks[i]->Name() << "\n";
+    }
+    mju_error("Invalid --task flag.");
+  }
+
+  mjpc::StartApp(tasks, 11);  // start with quadruped flat
   return 0;
 }
diff --git a/mjpc/tasks/bimanual/reorient/task.xml b/mjpc/tasks/bimanual/reorient/task.xml
@@ -32,7 +32,7 @@
   <sensor>
     <user name="Reach L" dim="3" user="2 0.1 0 .5 0.005"/>
     <user name="Reach R" dim="3" user="2 0.1 0 .5 0.005"/>
-    <user name="Bring" dim="3" user="2 1 0 1 0.003"/>
+    <user name="Bring" dim="3" user="2 1 0 5 0.003"/>
     <user name="Cube Orientation" dim="3" user="0 0.02 0 .2" />
     <user name="Cube Velocity" dim="3" user="0 .01 0 .2" />
     <framequat name="cube_goal_orientation" objtype="body" objname="goal"/>

diff --git a/python/mujoco_mpc/demos/agent/cartpole_gui.py b/python/mujoco_mpc/demos/agent/cartpole_gui.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pathlib
 import mujoco
 from mujoco_mpc import agent as agent_lib
+import pathlib
 
 # Cartpole model
 model_path = (
@@ -32,4 +32,4 @@
     model=model,
 ) as agent:
   while True:
-    None
+    pass
diff --git a/python/mujoco_mpc/demos/predictive_sampling/cube_orientation.py b/python/mujoco_mpc/demos/predictive_sampling/cube_orientation.py
@@ -17,16 +17,15 @@
 import mujoco
 import numpy as np
 import pathlib
-import predictive_sampling
 
+import predictive_sampling
 # %%
 # path to hand task
+
 model_path = (
     pathlib.Path(__file__).parent.parent.parent
-    / "../../build/mjpc/tasks/hand/task.xml"
-
+    / "../../build/mjpc/tasks/shadow_reorient/task.xml"
 )
-
 # create simulation model + data
 model = mujoco.MjModel.from_xml_path(str(model_path))
 data = mujoco.MjData(model)
@@ -36,7 +35,6 @@
 # %%
 # reward
 
-
 def reward(model: mujoco.MjModel, data: mujoco.MjData) -> float:
   # cube position - palm position (L22 norm)
   pos_error = (