Skip to content

Commit

Permalink
Merge pull request #116 from XinYao1994/main
Browse files Browse the repository at this point in the history
[Enhancement Request] Integrate Plato into Sedna as a backend for supporting federated learning - Phase one
  • Loading branch information
kubeedge-bot authored Sep 8, 2021
2 parents e61f6f8 + 2488b3f commit e110ac7
Show file tree
Hide file tree
Showing 22 changed files with 1,017 additions and 14 deletions.
87 changes: 87 additions & 0 deletions build/crd-samples/sedna/federatedlearningjob_yolo_v1alpha1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
apiVersion: sedna.io/v1alpha1
kind: FederatedLearningJob
metadata:
name: yolo-v5
spec:
pretrainedModel: # option
name: "yolo-v5-pretrained-model"
transmitter: # option
ws: { } # option, by default
s3: # option, but at least one
aggDataPath: "s3://sedna/fl/aggregation_data"
credentialName: mysecret
aggregationWorker:
model:
name: "yolo-v5-model"
template:
spec:
nodeName: "sedna-control-plane"
containers:
- image: kubeedge/sedna-example-federated-learning-mistnet-yolo-aggregator:v0.4.0
name: agg-worker
imagePullPolicy: IfNotPresent
env: # user defined environments
- name: "cut_layer"
value: "4"
- name: "epsilon"
value: "100"
- name: "aggregation_algorithm"
value: "mistnet"
- name: "batch_size"
value: "32"
resources: # user defined resources
limits:
memory: 8Gi
trainingWorkers:
- dataset:
name: "coco-dataset-1"
template:
spec:
nodeName: "edge-node"
containers:
- image: kubeedge/sedna-example-federated-learning-mistnet-yolo-client:v0.4.0
name: train-worker
imagePullPolicy: IfNotPresent
args: [ "-i", "1" ]
env: # user defined environments
- name: "cut_layer"
value: "4"
- name: "epsilon"
value: "100"
- name: "aggregation_algorithm"
value: "mistnet"
- name: "batch_size"
value: "32"
- name: "learning_rate"
value: "0.001"
- name: "epochs"
value: "1"
resources: # user defined resources
limits:
memory: 2Gi
- dataset:
name: "coco-dataset-2"
template:
spec:
nodeName: "edge-node"
containers:
- image: kubeedge/sedna-example-federated-learning-mistnet-yolo-client:v0.4.0
name: train-worker
imagePullPolicy: IfNotPresent
args: [ "-i", "2" ]
env: # user defined environments
- name: "cut_layer"
value: "4"
- name: "epsilon"
value: "100"
- name: "aggregation_algorithm"
value: "mistnet"
- name: "batch_size"
value: "32"
- name: "learning_rate"
value: "0.001"
- name: "epochs"
value: "1"
resources: # user defined resources
limits:
memory: 2Gi
4 changes: 3 additions & 1 deletion examples/build_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
cd "$(dirname "${BASH_SOURCE[0]}")"

IMAGE_REPO=${IMAGE_REPO:-kubeedge}
IMAGE_TAG=${IMAGE_TAG:-v0.3.0}
IMAGE_TAG=${IMAGE_TAG:-v0.4.0}

EXAMPLE_REPO_PREFIX=${IMAGE_REPO}/sedna-example-

dockerfiles=(
federated-learning-mistnet-yolo-aggregator.Dockerfile
federated-learning-mistnet-yolo-client.Dockerfile
federated-learning-surface-defect-detection-aggregation.Dockerfile
federated-learning-surface-defect-detection-train.Dockerfile
incremental-learning-helmet-detection.Dockerfile
Expand Down
23 changes: 23 additions & 0 deletions examples/federated-learning-mistnet-yolo-aggregator.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM tensorflow/tensorflow:1.15.4

RUN apt update \
&& apt install -y libgl1-mesa-glx git

COPY ./lib/requirements.txt /home

RUN python -m pip install --upgrade pip

RUN pip install -r /home/requirements.txt

ENV PYTHONPATH "/home/lib:/home/plato:/home/plato/packages/yolov5"

COPY ./lib /home/lib
RUN git clone https://github.com/TL-System/plato.git /home/plato

RUN pip install -r /home/plato/requirements.txt
RUN pip install -r /home/plato/packages/yolov5/requirements.txt

WORKDIR /home/work
COPY examples/federated_learning/yolov5_coco128_mistnet /home/work/

CMD ["/bin/sh", "-c", "ulimit -n 50000; python aggregate.py"]
23 changes: 23 additions & 0 deletions examples/federated-learning-mistnet-yolo-client.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM tensorflow/tensorflow:1.15.4

RUN apt update \
&& apt install -y libgl1-mesa-glx git

COPY ./lib/requirements.txt /home

RUN python -m pip install --upgrade pip

RUN pip install -r /home/requirements.txt

ENV PYTHONPATH "/home/lib:/home/plato:/home/plato/packages/yolov5"

COPY ./lib /home/lib
RUN git clone https://github.com/TL-System/plato.git /home/plato

RUN pip install -r /home/plato/requirements.txt
RUN pip install -r /home/plato/packages/yolov5/requirements.txt

WORKDIR /home/work
COPY examples/federated_learning/yolov5_coco128_mistnet /home/work/

ENTRYPOINT ["python", "train.py"]
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import numpy as np
Expand Down Expand Up @@ -74,6 +73,7 @@ def main():
learning_rate=learning_rate,
validation_split=validation_split
)

return train_jobs


Expand Down
Loading

0 comments on commit e110ac7

Please sign in to comment.