-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add ParameterServerController for parameter server python api #1051
Changes from 5 commits
f3c61cb
f9a65b0
95f20b9
cfbb4c4
7783982
93e74f8
3f6c2b3
5aaaef4
b1eeb2e
d32c7a6
aa9f516
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include "PServerUtil.h" | ||
|
||
namespace paddle { | ||
|
||
PServerUtil::PServerUtil(const ParameterServerConfig& config) { | ||
// round robin to load balance RDMA server ENGINE | ||
std::vector<std::string> devices; | ||
int rdmaCpu = 0; | ||
int onlineCpus = rdma::numCpus(); | ||
int numPorts = config.ports_num() + config.ports_num_for_sparse(); | ||
|
||
if (config.nics().empty()) { | ||
pservers_.resize(numPorts); | ||
for (int i = 0; i < numPorts; ++i) { | ||
if (config.rdma_tcp() == "rdma") { | ||
pservers_[i].reset( | ||
new ParameterServer2(std::string(), config.port() + i, rdmaCpu++)); | ||
rdmaCpu = rdmaCpu % onlineCpus; | ||
} else { | ||
pservers_[i].reset( | ||
new ParameterServer2(std::string(), config.port() + i)); | ||
} | ||
CHECK(pservers_[i]->init()) << "Fail to initialize parameter server" | ||
<< config.port() + i; | ||
} | ||
} else { | ||
str::split(config.nics(), ',', &devices); | ||
pservers_.resize(devices.size() * numPorts); | ||
for (int i = 0; i < numPorts; ++i) { | ||
for (size_t j = 0; j < devices.size(); ++j) { | ||
if (config.rdma_tcp() == "rdma") { | ||
pservers_[i * devices.size() + j].reset(new ParameterServer2( | ||
getIpAddr(devices[j]), config.port() + i, rdmaCpu++)); | ||
rdmaCpu = rdmaCpu % onlineCpus; | ||
} else { | ||
pservers_[i * devices.size() + j].reset( | ||
new ParameterServer2(getIpAddr(devices[j]), config.port() + i)); | ||
} | ||
CHECK(pservers_[i * devices.size() + j]->init()) | ||
<< "Fail to initialize parameter server" << devices[j] | ||
<< config.port() + i; | ||
} | ||
} | ||
} | ||
} | ||
|
||
PServerUtil::~PServerUtil() { this->join(); } | ||
|
||
ParameterServerConfig* PServerUtil::initConfig() { | ||
ParameterServerConfig* config = new ParameterServerConfig(); | ||
config->set_nics(FLAGS_nics); | ||
config->set_port(FLAGS_port); | ||
config->set_ports_num(FLAGS_ports_num); | ||
config->set_rdma_tcp(FLAGS_rdma_tcp); | ||
return config; | ||
} | ||
|
||
PServerUtil* PServerUtil::createWithGflags() { | ||
auto& pServerConfig = *paddle::PServerUtil::initConfig(); | ||
return create(pServerConfig); | ||
} | ||
|
||
PServerUtil* PServerUtil::create(const ParameterServerConfig& config) { | ||
return new PServerUtil(config); | ||
} | ||
|
||
void PServerUtil::start() { | ||
LOG(INFO) << "pserver sizes : " << pservers_.size(); | ||
int i = 0; | ||
for (const auto& pserver : pservers_) { | ||
LOG(INFO) << "pserver started : " << i; | ||
pserver->start(); | ||
i++; | ||
} | ||
} | ||
|
||
void PServerUtil::join() { | ||
LOG(INFO) << "pserver sizes : " << pservers_.size(); | ||
int i = 0; | ||
for (const auto& pserver : pservers_) { | ||
LOG(INFO) << "pserver join : " << i; | ||
pserver->join(); | ||
i++; | ||
} | ||
} | ||
|
||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#pragma once | ||
|
||
#include "ParameterServer2.h" | ||
#include "ParameterServerConfig.pb.h" | ||
#include "RDMANetwork.h" | ||
#include "paddle/utils/StringUtil.h" | ||
|
||
namespace paddle { | ||
|
||
class PServerUtil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
public: | ||
DISABLE_COPY(PServerUtil); | ||
|
||
/** | ||
* @brief Ctor, Create a PServerUtil from ParameterServerConfig. | ||
*/ | ||
explicit PServerUtil(const ParameterServerConfig& config); | ||
|
||
/** | ||
* @brief Dtor. | ||
*/ | ||
~PServerUtil(); | ||
|
||
/** | ||
* @brief create PServerUtil from gflags, this is used for | ||
* compatibility with the old usage of configuration by gflags. | ||
*/ | ||
static PServerUtil* createWithGflags(); | ||
|
||
/** | ||
* @brief create PServerUtil with ParameterServerConfig, remove gflags | ||
* from ParameterServer. Init all pservers thread according to the config. | ||
*/ | ||
static PServerUtil* create(const ParameterServerConfig& config); | ||
|
||
/** | ||
* @brief start all pserver thread in this PServerUtil. | ||
*/ | ||
void start(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 我们的naming convention指定的怎么样了? @reyoung 在这里constructor是camel形式,但是methods都是小写。显然不一致呀。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里因为构造函数和类名必须一致。类名必须是UpperCamelCase。比如 "SomeClass".而函数名是"lowerCamelCase",比如"someMethod"。 这样做的好处是,我们可以通过判断出一个东西是不是类型了。 比如
|
||
|
||
/** | ||
* @brief join and wait for all pserver thread in this PServerUtil. | ||
*/ | ||
void join(); | ||
|
||
private: | ||
std::vector<std::shared_ptr<ParameterServer2>> pservers_; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
|
||
/** | ||
* @brief create ParameterServerConfig from gflags, this is used for | ||
* compatibility with the old usage of configuration by gflags. | ||
*/ | ||
static ParameterServerConfig* initConfig(); | ||
}; | ||
|
||
} // namespace paddle |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
我现在对util这样的命名特别紧张——我发现一般都是大家懒得想明白应该叫什么的时候就叫util了。
在这里,看上去是想叫 PServerController 什么的。
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cool,同意。
PServerController是个好名字。
感觉程序员起名字是一个非常痛苦的事情。一痛苦就会用一些比较常用的名字,比如Utils
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done