Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DIPU]fix cuda generator #927

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dipu/torch_dipu/csrc_dipu/diopirt/diopirt_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,8 @@ DIOPI_RT_API diopiError_t diopiGeneratorSetSeedAndOffset(
diopiGeneratorHandle_t th, uint64_t seed, uint64_t offset) {
auto generator = reinterpret_cast<at::Generator*>(th);
auto gen_impl = at::check_generator<dipu::DIPUGeneratorImpl>(*generator);
gen_impl->set_offset(offset);
gen_impl->set_current_seed(seed);
gen_impl->set_offset(offset);
return diopiSuccess;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ at::Generator createDIPUGenerator(at::DeviceIndex device_index) {
*/
DIPUGeneratorImpl::DIPUGeneratorImpl(at::DeviceIndex device_index)
: c10::GeneratorImpl{at::Device(dipu::DIPU_DEVICE_TYPE, device_index),
at::DispatchKeySet(dipu::DIPU_DISPATCH_KEY)},
offset_(0),
state_need_reset_(true) {}
at::DispatchKeySet(dipu::DIPU_DISPATCH_KEY)} {}

/**
* Sets the seed to be used by MTGP
Expand All @@ -91,6 +89,7 @@ DIPUGeneratorImpl::DIPUGeneratorImpl(at::DeviceIndex device_index)
*/
void DIPUGeneratorImpl::set_current_seed(uint64_t seed) {
seed_ = seed;
offset_ = 0;
state_need_reset_ = true;
}

Expand Down Expand Up @@ -137,6 +136,7 @@ DIPUGeneratorImpl* DIPUGeneratorImpl::clone_impl() const {
createDIPUGenerator(this->device().index()).unsafeReleaseGeneratorImpl());
TORCH_CHECK(gen != nullptr);
gen->set_current_seed(this->seed_);
gen->set_offset(offset_);
auto state = this->state_;
const auto& state_clone = state.clone();
gen->set_state(*state_clone.getIntrusivePtr());
Expand Down
14 changes: 10 additions & 4 deletions dipu/torch_dipu/csrc_dipu/runtime/core/DIPUGeneratorImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,17 @@ class DIPUGeneratorImpl : public c10::GeneratorImpl {
// not match the order elsewhere. we will change to keep the order from
// oldest-compatiable to latest vesion.
#if DIPU_TORCH_VERSION == 20100 || DIPU_TORCH_VERSION == 20101
void set_offset(uint64_t offset) override { offset_ = offset; }
void set_offset(uint64_t offset) override {
offset_ = offset;
state_need_reset_ = true;
}
uint64_t get_offset() const override { return offset_; }

#else // # temp solution, default use torch2.0.0
virtual void set_offset(uint64_t offset) { offset_ = offset; }
virtual void set_offset(uint64_t offset) {
offset_ = offset;
state_need_reset_ = true;
}
virtual uint64_t get_offset() const { return offset_; }

#endif
Expand All @@ -40,10 +46,10 @@ class DIPUGeneratorImpl : public c10::GeneratorImpl {
virtual void update_state() const = 0;

DIPUGeneratorImpl* clone_impl() const override;
volatile uint64_t offset_;
volatile uint64_t offset_ = 0;
uint64_t seed_ = c10::default_rng_seed_val;
mutable at::Tensor state_;
mutable bool state_need_reset_;
mutable bool state_need_reset_ = true;
};

at::Generator& getDefaultDIPUGenerator(at::DeviceIndex device_index = -1);
Expand Down
23 changes: 22 additions & 1 deletion dipu/torch_dipu/csrc_dipu/vendor/cuda/CudaGeneratorImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,27 @@ class CUDAGeneratorImpl : public dipu::DIPUGeneratorImpl {
state_size == total_size || state_size == total_size - offset_size,
"RNG state is wrong size");

// 1. set seed and offset
bool no_philox_seed = false;
if (state_size == total_size - offset_size) {
no_philox_seed = true;
}

uint64_t input_seed = 0;
#if DIPU_TORCH_VERSION == 20000
auto new_rng_state = state.data<uint8_t>();
#else
auto new_rng_state = state.data_dtype_initialized<uint8_t>();
#endif
memcpy(&input_seed, new_rng_state, seed_size);
this->set_current_seed(input_seed);
int64_t philox_offset = 0;
if (!no_philox_seed) {
memcpy(&philox_offset, new_rng_state + seed_size, offset_size);
}
this->set_offset(static_cast<uint64_t>(philox_offset));

// 2. set state
at::Tensor state_tmp(
state.shallow_copy_and_detach(state.version_counter(), true));
state_ = state_tmp;
Expand All @@ -44,7 +65,7 @@ class CUDAGeneratorImpl : public dipu::DIPUGeneratorImpl {
// THCGenerator struct was an array of curandStateMtgp32s.
memset(rng_state, -1, states_size);
uint64_t current_seed = this->current_seed();
int64_t offset = 0;
int64_t offset = this->get_offset();
memcpy(rng_state + states_size, &current_seed, seed_size);
memcpy(rng_state + states_size + seed_size, &offset, offset_size);
state_need_reset_ = false;
Expand Down
Loading