Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Quadruped Config and Envs for Standing #88

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Local files
.env
*.DS_Store
isaacgym
# Python
*.py[oc]
__pycache__/
Expand Down
Binary file modified policy_1.pt
Binary file not shown.
2 changes: 1 addition & 1 deletion sim/algo/ppo/on_policy_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = Fals
# initialize writer
if self.log_dir is not None and self.writer is None:
wandb.init(
project="XBot",
project="Quadruped Try",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove

sync_tensorboard=True,
name=self.wandb_run_name,
config=self.all_cfg,
Expand Down
10 changes: 9 additions & 1 deletion sim/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,17 @@
from sim.envs.humanoids.g1_env import G1FreeEnv
from sim.envs.humanoids.h1_config import H1Cfg, H1CfgPPO
from sim.envs.humanoids.h1_env import H1FreeEnv
from sim.envs.humanoids.quadruped_config import QuadrupedCfg, QuadrupedCfgPPO
from sim.envs.humanoids.quadruped_env import QuadrupedFreeEnv
from sim.envs.humanoids.stompymicro_config import StompyMicroCfg, StompyMicroCfgPPO
from sim.envs.humanoids.stompymicro_env import StompyMicroEnv
from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
from sim.envs.humanoids.stompymini_env import MiniFreeEnv
from sim.envs.humanoids.stompypro_config import StompyProCfg, StompyProStandingCfg, StompyProCfgPPO
from sim.envs.humanoids.stompypro_config import (
StompyProCfg,
StompyProCfgPPO,
StompyProStandingCfg,
)
from sim.envs.humanoids.stompypro_env import StompyProFreeEnv
from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
from sim.envs.humanoids.xbot_env import XBotLFreeEnv
Expand All @@ -31,5 +37,7 @@
task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO())
task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO())
task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO())
task_registry.register("xbot", XBotLFreeEnv, XBotCfg(), XBotCfgPPO())
task_registry.register("XBotL_free", XBotLFreeEnv, XBotCfg(), XBotCfgPPO())
task_registry.register("stompymicro", StompyMicroEnv, StompyMicroCfg(), StompyMicroCfgPPO())
task_registry.register("quadruped", QuadrupedFreeEnv, QuadrupedCfg(), QuadrupedCfgPPO())
886 changes: 886 additions & 0 deletions sim/envs/base/quadruped_robot.py

Large diffs are not rendered by default.

261 changes: 261 additions & 0 deletions sim/envs/base/quadruped_robot_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
# SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2021 ETH Zurich, Nikita Rudin
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.

from sim.envs.base.base_config import BaseConfig


class LeggedRobotCfg(BaseConfig):
class env:
num_envs = 4096
num_observations = 235
num_privileged_obs = None # if not None a priviledge_obs_buf will be returned by step() (critic obs for assymetric training). None is returned otherwise
num_actions = 12
env_spacing = 3.0 # not used with heightfields/trimeshes
send_timeouts = True # send time out information to the algorithm
episode_length_s = 20 # episode length in seconds

class terrain:
mesh_type = "trimesh" # "heightfield" # none, plane, heightfield or trimesh
horizontal_scale = 0.1 # [m]
vertical_scale = 0.005 # [m]
border_size = 25 # [m]
curriculum = True
static_friction = 1.0
dynamic_friction = 1.0
restitution = 0.0
# rough terrain only:
measure_heights = True
measured_points_x = [
-0.8,
-0.7,
-0.6,
-0.5,
-0.4,
-0.3,
-0.2,
-0.1,
0.0,
0.1,
0.2,
0.3,
0.4,
0.5,
0.6,
0.7,
0.8,
] # 1mx1.6m rectangle (without center line)
measured_points_y = [-0.5, -0.4, -0.3, -0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
selected = False # select a unique terrain type and pass all arguments
terrain_kwargs = None # Dict of arguments for selected terrain
max_init_terrain_level = 5 # starting curriculum state
terrain_length = 8.0
terrain_width = 8.0
num_rows = 10 # number of terrain rows (levels)
num_cols = 20 # number of terrain cols (types)
# terrain types: [smooth slope, rough slope, stairs up, stairs down, discrete]
terrain_proportions = [0.1, 0.1, 0.35, 0.25, 0.2]
# trimesh only:
slope_treshold = 0.75 # slopes above this threshold will be corrected to vertical surfaces

class commands:
curriculum = False
max_curriculum = 1.0
num_commands = 4 # default: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
resampling_time = 10.0 # time before command are changed[s]
heading_command = True # if true: compute ang vel command from heading error

class ranges:
lin_vel_x = [0.5, 10.0] # [-1.0, 1.0] # min max [m/s]
lin_vel_y = [0.3, 10.0] # min max [m/s]
ang_vel_pitch = [-1, 1] # min max [rad/s]
heading = [-3.14, 3.14]

class init_state:
pos = [0.0, 0.0, 1.0] # x,y,z [m]
rot = [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
lin_vel = [0.0, 0.0, 0.0] # x,y,z [m/s]
ang_vel = [0.0, 0.0, 0.0] # x,y,z [rad/s]
default_joint_angles = {"joint_a": 0.0, "joint_b": 0.0} # target angles when action = 0.0

class control:
# PD Drive parameters:
stiffness = {"joint_a": 10.0, "joint_b": 15.0} # [N*m/rad]
damping = {"joint_a": 1.0, "joint_b": 1.5} # [N*m*s/rad]
# action scale: target angle = actionScale * action + defaultAngle
action_scale = 0.5
# decimation: Number of control action updates @ sim DT per policy DT
decimation = 4

class asset:
file = ""
name = "legged_robot" # actor name
foot_name = "None" # name of the feet bodies, used to index body state and contact force tensors
penalize_contacts_on = []
terminate_after_contacts_on = []
disable_gravity = False
collapse_fixed_joints = True # merge bodies connected by fixed joints. Specific fixed joints can be kept by adding " <... dont_collapse="true">
fix_base_link = False # fixe the base of the robot
default_dof_drive_mode = 3 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 3 effort)
self_collisions = 0 # 1 to disable, 0 to enable...bitwise filter
replace_cylinder_with_capsule = (
True # replace collision cylinders with capsules, leads to faster/more stable simulation
)
flip_visual_attachments = True # Some .obj meshes must be flipped from y-up to z-up

density = 0.001
angular_damping = 0.0
linear_damping = 0.0
max_angular_velocity = 1000.0
max_linear_velocity = 1000.0
armature = 0.0
thickness = 0.01

class domain_rand:
start_pos_noise = 0.1
randomize_friction = True
friction_range = [0.5, 1.25]
randomize_base_mass = False
added_mass_range = [-1.0, 1.0]
push_robots = True
push_interval_s = 15
max_push_vel_xy = 1.0
start_pos_noise = 0.1

class rewards:
class scales:
termination = -0.0
tracking_lin_vel = 1.0
tracking_ang_vel = 0.5
lin_vel_z = -2.0
ang_vel_xy = -0.05
orientation = -0.0
torques = -0.00001
dof_vel = -0.0
dof_acc = -2.5e-7
base_height = -0.0
feet_air_time = 1.0
collision = -1.0
feet_stumble = -0.0
action_rate = -0.0
stand_still = -0.0

only_positive_rewards = (
True # if true negative total rewards are clipped at zero (avoids early termination problems)
)

class normalization:
class obs_scales:
lin_vel = 2.0
ang_vel = 0.25
dof_pos = 1.0
dof_vel = 0.05
height_measurements = 5.0

clip_observations = 100.0
clip_actions = 100.0

class noise:
add_noise = True
noise_level = 1.0 # scales other values

class noise_scales:
dof_pos = 0.01
dof_vel = 1.5
lin_vel = 0.1
ang_vel = 0.2
gravity = 0.05
height_measurements = 0.1

# viewer camera:
class viewer:
ref_env = 0
pos = [10, 0, 6] # [m]
lookat = [11.0, 5, 3.0] # [m]

class sim:
dt = 0.005
substeps = 1
gravity = [0.0, 0.0, -9.81] # [m/s^2]
up_axis = 1 # 0 is y, 1 is z

class physx:
num_threads = 10
solver_type = 1 # 0: pgs, 1: tgs
num_position_iterations = 4
num_velocity_iterations = 0
contact_offset = 0.01 # [m]
rest_offset = 0.0 # [m]
bounce_threshold_velocity = 0.5 # 0.5 [m/s]
max_depenetration_velocity = 1.0
max_gpu_contact_pairs = 2**23 # 2**24 -> needed for 8000 envs and more
default_buffer_size_multiplier = 5
contact_collection = 2 # 0: never, 1: last sub-step, 2: all sub-steps (default=2)


class LeggedRobotCfgPPO(BaseConfig):
seed = 1
runner_class_name = "OnPolicyRunner"

class policy:
init_noise_std = 1.0
actor_hidden_dims = [512, 256, 128]
critic_hidden_dims = [512, 256, 128]

class algorithm:
# training params
value_loss_coef = 1.0
use_clipped_value_loss = True
clip_param = 0.2
entropy_coef = 0.01
num_learning_epochs = 5
num_mini_batches = 4 # mini batch size = num_envs*nsteps / nminibatches
learning_rate = 1.0e-3 # 5.e-4
schedule = "adaptive" # could be adaptive, fixed
gamma = 0.99
lam = 0.95
desired_kl = 0.01
max_grad_norm = 1.0

class runner:
policy_class_name = "ActorCritic"
algorithm_class_name = "PPO"
num_steps_per_env = 24 # per iteration
max_iterations = 1500 # number of policy updates

# logging
save_interval = 100 # check for potential saves every this many iterations
experiment_name = "test"
run_name = ""
# load and resume
resume = False
load_run = -1 # -1 = last run
checkpoint = -1 # -1 = last saved model
resume_path = None # updated from load_run and chkpt
Loading
Loading