You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am trying to run the training script as it is without using the replay buffer.
I am getting the following errors.
Start training ...
Rank [0], Epoch [0]: Training on train dataset
Rank [0], Epoch [0]: Training on train dataset
Rank [0], Epoch [0]: Training on train dataset
Rank [0], Epoch [0]: Training on train dataset
Traceback (most recent call last):
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 300, in
mp.spawn(experiment, args=(cmd_args, devices, port), nprocs=len(devices), join=True)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 3 terminated with the following error:
Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 260, in experiment
out = train(agent, train_dataset, TRAINING_ITERATIONS, rank)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 54, in train
raw_batch = next(data_iter)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 681, in next
data = self._next_data()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1376, in _next_data
return self._process_data(data)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1402, in _process_data
data.reraise()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/_utils.py", line 461, in reraise
raise exception
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 39, in fetch
data = next(self.dataset_iter)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/wrappers/pytorch_replay_buffer.py", line 41, in _generator
yield self._replay_buffer.sample_transition_batch(pack_in_dict=True, distribution_mode = self._sample_distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 772, in sample_transition_batch
indices = self.sample_index_batch(batch_size, distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 706, in sample_index_batch
state_index = np.random.randint(low = self._task_replay_start_index[task_index],
File "mtrand.pyx", line 765, in numpy.random.mtrand.RandomState.randint
File "_bounded_integers.pyx", line 1247, in numpy.random._bounded_integers._rand_int64
ValueError: high <= 0
Traceback (most recent call last):
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 300, in
mp.spawn(experiment, args=(cmd_args, devices, port), nprocs=len(devices), join=True)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 1 terminated with the following error:
Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 260, in experiment
out = train(agent, train_dataset, TRAINING_ITERATIONS, rank)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 54, in train
raw_batch = next(data_iter)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 681, in next
data = self._next_data()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1376, in _next_data
return self._process_data(data)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1402, in _process_data
data.reraise()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/_utils.py", line 461, in reraise
raise exception
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 39, in fetch
data = next(self.dataset_iter)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/wrappers/pytorch_replay_buffer.py", line 41, in _generator
yield self._replay_buffer.sample_transition_batch(pack_in_dict=True, distribution_mode = self._sample_distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 772, in sample_transition_batch
indices = self.sample_index_batch(batch_size, distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 706, in sample_index_batch
state_index = np.random.randint(low = self._task_replay_start_index[task_index],
File "mtrand.pyx", line 765, in numpy.random.mtrand.RandomState.randint
File "_bounded_integers.pyx", line 1247, in numpy.random._bounded_integers._rand_int64
ValueError: high <= 0
Traceback (most recent call last):
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 300, in
mp.spawn(experiment, args=(cmd_args, devices, port), nprocs=len(devices), join=True)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 3 terminated with the following error:
Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 260, in experiment
out = train(agent, train_dataset, TRAINING_ITERATIONS, rank)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 54, in train
raw_batch = next(data_iter)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 681, in next
data = self._next_data()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1376, in _next_data
return self._process_data(data)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1402, in _process_data
data.reraise()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/_utils.py", line 461, in reraise
raise exception
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 39, in fetch
data = next(self.dataset_iter)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/wrappers/pytorch_replay_buffer.py", line 41, in _generator
yield self._replay_buffer.sample_transition_batch(pack_in_dict=True, distribution_mode = self._sample_distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 772, in sample_transition_batch
indices = self.sample_index_batch(batch_size, distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 706, in sample_index_batch
state_index = np.random.randint(low = self._task_replay_start_index[task_index],
File "mtrand.pyx", line 765, in numpy.random.mtrand.RandomState.randint
File "_bounded_integers.pyx", line 1247, in numpy.random._bounded_integers._rand_int64
ValueError: high <= 0
Traceback (most recent call last):
File "", line 1, in
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
_pickle.UnpicklingError: pickle data was truncated
Traceback (most recent call last):
File "", line 1, in
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
_pickle.UnpicklingError: pickle data was truncated
Traceback (most recent call last):
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 300, in
mp.spawn(experiment, args=(cmd_args, devices, port), nprocs=len(devices), join=True)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 3 terminated with the following error:
Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 260, in experiment
out = train(agent, train_dataset, TRAINING_ITERATIONS, rank)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 54, in train
raw_batch = next(data_iter)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 681, in next
data = self._next_data()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1376, in _next_data
return self._process_data(data)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1402, in _process_data
data.reraise()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/_utils.py", line 461, in reraise
raise exception
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 39, in fetch
data = next(self.dataset_iter)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/wrappers/pytorch_replay_buffer.py", line 41, in _generator
yield self._replay_buffer.sample_transition_batch(pack_in_dict=True, distribution_mode = self._sample_distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 772, in sample_transition_batch
indices = self.sample_index_batch(batch_size, distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 706, in sample_index_batch
state_index = np.random.randint(low = self._task_replay_start_index[task_index],
File "mtrand.pyx", line 765, in numpy.random.mtrand.RandomState.randint
File "_bounded_integers.pyx", line 1247, in numpy.random._bounded_integers._rand_int64
ValueError: low >= high
srun: error: gpu-11: task 2: Exited with exit code 1
Traceback (most recent call last):
File "", line 1, in
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
_pickle.UnpicklingError: pickle data was truncated
Traceback (most recent call last):
File "", line 1, in
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
_pickle.UnpicklingError: pickle data was truncated
srun: error: gpu-11: tasks 0-1: Exited with exit code 1
srun: error: gpu-11: task 3: Exited with exit code 1
The text was updated successfully, but these errors were encountered:
Thanks for your interest in our work. People have had success using the replay buffer. I recommend trying that variant and seeing if it works for you. Could you let me know if you have an issue with that variant?
If other people also have issues with the variation without a replay buffer, we might deprecate that version.
Hi, Thanks for your great work!!
I am trying to run the training script as it is without using the replay buffer.
I am getting the following errors.
Start training ...
Rank [0], Epoch [0]: Training on train dataset
Rank [0], Epoch [0]: Training on train dataset
Rank [0], Epoch [0]: Training on train dataset
Rank [0], Epoch [0]: Training on train dataset
Traceback (most recent call last):
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 300, in
mp.spawn(experiment, args=(cmd_args, devices, port), nprocs=len(devices), join=True)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 3 terminated with the following error:
Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 260, in experiment
out = train(agent, train_dataset, TRAINING_ITERATIONS, rank)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 54, in train
raw_batch = next(data_iter)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 681, in next
data = self._next_data()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1376, in _next_data
return self._process_data(data)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1402, in _process_data
data.reraise()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/_utils.py", line 461, in reraise
raise exception
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 39, in fetch
data = next(self.dataset_iter)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/wrappers/pytorch_replay_buffer.py", line 41, in _generator
yield self._replay_buffer.sample_transition_batch(pack_in_dict=True, distribution_mode = self._sample_distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 772, in sample_transition_batch
indices = self.sample_index_batch(batch_size, distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 706, in sample_index_batch
state_index = np.random.randint(low = self._task_replay_start_index[task_index],
File "mtrand.pyx", line 765, in numpy.random.mtrand.RandomState.randint
File "_bounded_integers.pyx", line 1247, in numpy.random._bounded_integers._rand_int64
ValueError: high <= 0
Traceback (most recent call last):
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 300, in
mp.spawn(experiment, args=(cmd_args, devices, port), nprocs=len(devices), join=True)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 1 terminated with the following error:
Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 260, in experiment
out = train(agent, train_dataset, TRAINING_ITERATIONS, rank)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 54, in train
raw_batch = next(data_iter)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 681, in next
data = self._next_data()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1376, in _next_data
return self._process_data(data)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1402, in _process_data
data.reraise()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/_utils.py", line 461, in reraise
raise exception
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 39, in fetch
data = next(self.dataset_iter)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/wrappers/pytorch_replay_buffer.py", line 41, in _generator
yield self._replay_buffer.sample_transition_batch(pack_in_dict=True, distribution_mode = self._sample_distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 772, in sample_transition_batch
indices = self.sample_index_batch(batch_size, distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 706, in sample_index_batch
state_index = np.random.randint(low = self._task_replay_start_index[task_index],
File "mtrand.pyx", line 765, in numpy.random.mtrand.RandomState.randint
File "_bounded_integers.pyx", line 1247, in numpy.random._bounded_integers._rand_int64
ValueError: high <= 0
Traceback (most recent call last):
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 300, in
mp.spawn(experiment, args=(cmd_args, devices, port), nprocs=len(devices), join=True)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 3 terminated with the following error:
Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 260, in experiment
out = train(agent, train_dataset, TRAINING_ITERATIONS, rank)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 54, in train
raw_batch = next(data_iter)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 681, in next
data = self._next_data()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1376, in _next_data
return self._process_data(data)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1402, in _process_data
data.reraise()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/_utils.py", line 461, in reraise
raise exception
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 39, in fetch
data = next(self.dataset_iter)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/wrappers/pytorch_replay_buffer.py", line 41, in _generator
yield self._replay_buffer.sample_transition_batch(pack_in_dict=True, distribution_mode = self._sample_distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 772, in sample_transition_batch
indices = self.sample_index_batch(batch_size, distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 706, in sample_index_batch
state_index = np.random.randint(low = self._task_replay_start_index[task_index],
File "mtrand.pyx", line 765, in numpy.random.mtrand.RandomState.randint
File "_bounded_integers.pyx", line 1247, in numpy.random._bounded_integers._rand_int64
ValueError: high <= 0
Traceback (most recent call last):
File "", line 1, in
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
_pickle.UnpicklingError: pickle data was truncated
Traceback (most recent call last):
File "", line 1, in
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
_pickle.UnpicklingError: pickle data was truncated
Traceback (most recent call last):
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 300, in
mp.spawn(experiment, args=(cmd_args, devices, port), nprocs=len(devices), join=True)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 240, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method='spawn')
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 198, in start_processes
while not context.join():
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 160, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 3 terminated with the following error:
Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 69, in _wrap
fn(i, *args)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 260, in experiment
out = train(agent, train_dataset, TRAINING_ITERATIONS, rank)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/train.py", line 54, in train
raw_batch = next(data_iter)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 681, in next
data = self._next_data()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1376, in _next_data
return self._process_data(data)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1402, in _process_data
data.reraise()
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/_utils.py", line 461, in reraise
raise exception
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
data = fetcher.fetch(index)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 39, in fetch
data = next(self.dataset_iter)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/wrappers/pytorch_replay_buffer.py", line 41, in _generator
yield self._replay_buffer.sample_transition_batch(pack_in_dict=True, distribution_mode = self._sample_distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 772, in sample_transition_batch
indices = self.sample_index_batch(batch_size, distribution_mode)
File "/home/kiyogi/harsh/RVT_related_stuff/RVT/RVT/rvt/libs/YARR/yarr/replay_buffer/uniform_replay_buffer.py", line 706, in sample_index_batch
state_index = np.random.randint(low = self._task_replay_start_index[task_index],
File "mtrand.pyx", line 765, in numpy.random.mtrand.RandomState.randint
File "_bounded_integers.pyx", line 1247, in numpy.random._bounded_integers._rand_int64
ValueError: low >= high
srun: error: gpu-11: task 2: Exited with exit code 1
Traceback (most recent call last):
File "", line 1, in
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
_pickle.UnpicklingError: pickle data was truncated
Traceback (most recent call last):
File "", line 1, in
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "/home/kiyogi/miniconda3/envs/rvt/lib/python3.8/multiprocessing/spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
_pickle.UnpicklingError: pickle data was truncated
srun: error: gpu-11: tasks 0-1: Exited with exit code 1
srun: error: gpu-11: task 3: Exited with exit code 1
The text was updated successfully, but these errors were encountered: