kscalelabs · aaronxie0000 · Sep 19, 2024 · Sep 20, 2024 · Sep 20, 2024 · Sep 24, 2024
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@
 # Local files
 .env
 *.DS_Store
+isaacgym
 # Python
 *.py[oc]
 __pycache__/

diff --git a/policy_1.pt b/policy_1.pt
diff --git a/sim/algo/ppo/on_policy_runner.py b/sim/algo/ppo/on_policy_runner.py
@@ -96,7 +96,7 @@ def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = Fals
         # initialize writer
         if self.log_dir is not None and self.writer is None:
             wandb.init(
-                project="XBot",
+                project="Quadruped Try",
                 sync_tensorboard=True,
                 name=self.wandb_run_name,
                 config=self.all_cfg,

diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py
@@ -14,11 +14,17 @@
 from sim.envs.humanoids.g1_env import G1FreeEnv
 from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO
 from sim.envs.humanoids.h1_env import H1FreeEnv
+from sim.envs.humanoids.quadruped_config import QuadrupedCfg, QuadrupedCfgPPO
+from sim.envs.humanoids.quadruped_env import QuadrupedFreeEnv
 from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO
 from sim.envs.humanoids.stompymicro_env import StompyMicroEnv
 from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
 from sim.envs.humanoids.stompymini_env import MiniFreeEnv
-from sim.envs.humanoids.stompypro_config import StompyProCfg, StompyProStandingCfg, StompyProCfgPPO
+from sim.envs.humanoids.stompypro_config import (
+    StompyProCfg,
+    StompyProCfgPPO,
+    StompyProStandingCfg,
+)
 from sim.envs.humanoids.stompypro_env import StompyProFreeEnv
 from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
 from sim.envs.humanoids.xbot_env import XBotLFreeEnv
@@ -31,5 +37,7 @@
 task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO())
 task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO())
 task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO())
+task_registry.register("xbot", XBotLFreeEnv, XBotCfg(), XBotCfgPPO())
 task_registry.register("XBotL_free", XBotLFreeEnv, XBotCfg(), XBotCfgPPO())
 task_registry.register("stompymicro", StompyMicroEnv, StompyMicroCfg(), StompyMicroCfgPPO())
+task_registry.register("quadruped", QuadrupedFreeEnv, QuadrupedCfg(), QuadrupedCfgPPO())
diff --git a/sim/envs/base/quadruped_robot.py b/sim/envs/base/quadruped_robot.py
diff --git a/sim/envs/base/quadruped_robot_config.py b/sim/envs/base/quadruped_robot_config.py
@@ -0,0 +1,261 @@
+# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
+
+from sim.envs.base.base_config import BaseConfig
+
+
+class LeggedRobotCfg(BaseConfig):
+    class env:
+        num_envs = 4096
+        num_observations = 235
+        num_privileged_obs = None  # if not None a priviledge_obs_buf will be returned by step() (critic obs for assymetric training). None is returned otherwise
+        num_actions = 12
+        env_spacing = 3.0  # not used with heightfields/trimeshes
+        send_timeouts = True  # send time out information to the algorithm
+        episode_length_s = 20  # episode length in seconds
+
+    class terrain:
+        mesh_type = "trimesh"  # "heightfield" # none, plane, heightfield or trimesh
+        horizontal_scale = 0.1  # [m]
+        vertical_scale = 0.005  # [m]
+        border_size = 25  # [m]
+        curriculum = True
+        static_friction = 1.0
+        dynamic_friction = 1.0
+        restitution = 0.0
+        # rough terrain only:
+        measure_heights = True
+        measured_points_x = [
+            -0.8,
+            -0.7,
+            -0.6,
+            -0.5,
+            -0.4,
+            -0.3,
+            -0.2,
+            -0.1,
+            0.0,
+            0.1,
+            0.2,
+            0.3,
+            0.4,
+            0.5,
+            0.6,
+            0.7,
+            0.8,
+        ]  # 1mx1.6m rectangle (without center line)
+        measured_points_y = [-0.5, -0.4, -0.3, -0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
+        selected = False  # select a unique terrain type and pass all arguments
+        terrain_kwargs = None  # Dict of arguments for selected terrain
+        max_init_terrain_level = 5  # starting curriculum state
+        terrain_length = 8.0
+        terrain_width = 8.0
+        num_rows = 10  # number of terrain rows (levels)
+        num_cols = 20  # number of terrain cols (types)
+        # terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete]
+        terrain_proportions = [0.1, 0.1, 0.35, 0.25, 0.2]
+        # trimesh only:
+        slope_treshold = 0.75  # slopes above this threshold will be corrected to vertical surfaces
+
+    class commands:
+        curriculum = False
+        max_curriculum = 1.0
+        num_commands = 4  # default: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
+        resampling_time = 10.0  # time before command are changed[s]
+        heading_command = True  # if true: compute ang vel command from heading error
+
+        class ranges:
+            lin_vel_x = [0.5, 10.0]  # [-1.0, 1.0]  # min max [m/s]
+            lin_vel_y = [0.3, 10.0]  # min max [m/s]
+            ang_vel_pitch = [-1, 1]  # min max [rad/s]
+            heading = [-3.14, 3.14]
+
+    class init_state:
+        pos = [0.0, 0.0, 1.0]  # x,y,z [m]
+        rot = [0.0, 0.0, 0.0, 1.0]  # x,y,z,w [quat]
+        lin_vel = [0.0, 0.0, 0.0]  # x,y,z [m/s]
+        ang_vel = [0.0, 0.0, 0.0]  # x,y,z [rad/s]
+        default_joint_angles = {"joint_a": 0.0, "joint_b": 0.0}  # target angles when action = 0.0
+
+    class control:
+        # PD Drive parameters:
+        stiffness = {"joint_a": 10.0, "joint_b": 15.0}  # [N*m/rad]
+        damping = {"joint_a": 1.0, "joint_b": 1.5}  # [N*m*s/rad]
+        # action scale: target angle = actionScale * action + defaultAngle
+        action_scale = 0.5
+        # decimation: Number of control action updates @ sim DT per policy DT
+        decimation = 4
+
+    class asset:
+        file = ""
+        name = "legged_robot"  # actor name
+        foot_name = "None"  # name of the feet bodies, used to index body state and contact force tensors
+        penalize_contacts_on = []
+        terminate_after_contacts_on = []
+        disable_gravity = False
+        collapse_fixed_joints = True  # merge bodies connected by fixed joints. Specific fixed joints can be kept by adding " <... dont_collapse="true">
+        fix_base_link = False  # fixe the base of the robot
+        default_dof_drive_mode = 3  # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
+        self_collisions = 0  # 1 to disable, 0 to enable...bitwise filter
+        replace_cylinder_with_capsule = (
+            True  # replace collision cylinders with capsules, leads to faster/more stable simulation
+        )
+        flip_visual_attachments = True  # Some .obj meshes must be flipped from y-up to z-up
+
+        density = 0.001
+        angular_damping = 0.0
+        linear_damping = 0.0
+        max_angular_velocity = 1000.0
+        max_linear_velocity = 1000.0
+        armature = 0.0
+        thickness = 0.01
+
+    class domain_rand:
+        start_pos_noise = 0.1
+        randomize_friction = True
+        friction_range = [0.5, 1.25]
+        randomize_base_mass = False
+        added_mass_range = [-1.0, 1.0]
+        push_robots = True
+        push_interval_s = 15
+        max_push_vel_xy = 1.0
+        start_pos_noise = 0.1
+
+    class rewards:
+        class scales:
+            termination = -0.0
+            tracking_lin_vel = 1.0
+            tracking_ang_vel = 0.5
+            lin_vel_z = -2.0
+            ang_vel_xy = -0.05
+            orientation = -0.0
+            torques = -0.00001
+            dof_vel = -0.0
+            dof_acc = -2.5e-7
+            base_height = -0.0
+            feet_air_time = 1.0
+            collision = -1.0
+            feet_stumble = -0.0
+            action_rate = -0.0
+            stand_still = -0.0
+
+        only_positive_rewards = (
+            True  # if true negative total rewards are clipped at zero (avoids early termination problems)
+        )
+
+    class normalization:
+        class obs_scales:
+            lin_vel = 2.0
+            ang_vel = 0.25
+            dof_pos = 1.0
+            dof_vel = 0.05
+            height_measurements = 5.0
+
+        clip_observations = 100.0
+        clip_actions = 100.0
+
+    class noise:
+        add_noise = True
+        noise_level = 1.0  # scales other values
+
+        class noise_scales:
+            dof_pos = 0.01
+            dof_vel = 1.5
+            lin_vel = 0.1
+            ang_vel = 0.2
+            gravity = 0.05
+            height_measurements = 0.1
+
+    # viewer camera:
+    class viewer:
+        ref_env = 0
+        pos = [10, 0, 6]  # [m]
+        lookat = [11.0, 5, 3.0]  # [m]
+
+    class sim:
+        dt = 0.005
+        substeps = 1
+        gravity = [0.0, 0.0, -9.81]  # [m/s^2]
+        up_axis = 1  # 0 is y, 1 is z
+
+        class physx:
+            num_threads = 10
+            solver_type = 1  # 0: pgs, 1: tgs
+            num_position_iterations = 4
+            num_velocity_iterations = 0
+            contact_offset = 0.01  # [m]
+            rest_offset = 0.0  # [m]
+            bounce_threshold_velocity = 0.5  # 0.5 [m/s]
+            max_depenetration_velocity = 1.0
+            max_gpu_contact_pairs = 2**23  # 2**24 -> needed for 8000 envs and more
+            default_buffer_size_multiplier = 5
+            contact_collection = 2  # 0: never, 1: last sub-step, 2: all sub-steps (default=2)
+
+
+class LeggedRobotCfgPPO(BaseConfig):
+    seed = 1
+    runner_class_name = "OnPolicyRunner"
+
+    class policy:
+        init_noise_std = 1.0
+        actor_hidden_dims = [512, 256, 128]
+        critic_hidden_dims = [512, 256, 128]
+
+    class algorithm:
+        # training params
+        value_loss_coef = 1.0
+        use_clipped_value_loss = True
+        clip_param = 0.2
+        entropy_coef = 0.01
+        num_learning_epochs = 5
+        num_mini_batches = 4  # mini batch size = num_envs*nsteps / nminibatches
+        learning_rate = 1.0e-3  # 5.e-4
+        schedule = "adaptive"  # could be adaptive, fixed
+        gamma = 0.99
+        lam = 0.95
+        desired_kl = 0.01
+        max_grad_norm = 1.0
+
+    class runner:
+        policy_class_name = "ActorCritic"
+        algorithm_class_name = "PPO"
+        num_steps_per_env = 24  # per iteration
+        max_iterations = 1500  # number of policy updates
+
+        # logging
+        save_interval = 100  # check for potential saves every this many iterations
+        experiment_name = "test"
+        run_name = ""
+        # load and resume
+        resume = False
+        load_run = -1  # -1 = last run
+        checkpoint = -1  # -1 = last saved model
+        resume_path = None  # updated from load_run and chkpt