Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dgxie HA #44

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions config.example/dgxie.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@ mgmtIntPrv: eth1
# Network domain
netDomain: local

# IP address of private network interface on management server
netPrvIp: 192.168.1.1

# Private Network
netPrvNet: 192.168.1.0

Expand All @@ -53,6 +50,10 @@ netPrvDhcpEnd: 192.168.1.199
# DHCP lease time
netPrvDhcpLease: 7200

replicaCount: 2

pixiecoreSigningKey: 'mysecret'

# HTTPS proxy to use in preseed
#httpsProxy: "http://192.168.2.1:3128"

Expand Down
26 changes: 21 additions & 5 deletions containers/dgxie/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,29 @@
FROM ubuntu:16.04
FROM golang:1.10.2

ENV COMMIT_HASH 27896bc470a2044c714101b2a321d6d900ccecbe
ENV CUSTOM_FORK_AUTHOR hightoxicity
RUN apt-get update
RUN apt-get install -qy --no-install-recommends wget git
RUN [ -d ${GOPATH}/bin ] || mkdir ${GOPATH}/bin
RUN go get -u github.com/golang/dep/cmd/dep
RUN mkdir -p ${GOPATH}/src/go.universe.tf
WORKDIR /go/src/go.universe.tf
RUN git clone https://github.com/google/netboot.git
WORKDIR /go/src/go.universe.tf/netboot
RUN git remote add ${CUSTOM_FORK_AUTHOR} https://github.com/${CUSTOM_FORK_AUTHOR}/netboot.git && git fetch ${CUSTOM_FORK_AUTHOR} && git checkout ${COMMIT_HASH}
RUN dep ensure
RUN ls -al ./vendor
WORKDIR /go/src
RUN CGO_ENABLED=0 GOOS=linux go build -o /bin/pixiecore -ldflags "-w -s -v -extldflags -static" go.universe.tf/netboot/cmd/pixiecore

FROM ubuntu:16.04
MAINTAINER Douglas Holt <[email protected]>

RUN apt-get update && \
apt-get -y install apt-transport-https curl && \
curl -L https://packagecloud.io/danderson/pixiecore/gpgkey | apt-key add - && \
echo "deb https://packagecloud.io/danderson/pixiecore/debian stretch main" >/etc/apt/sources.list.d/pixiecore.list && \
apt-get update && \
apt-get -y install pixiecore nginx vsftpd iptables dnsmasq python-flask
apt-get -y install nginx vsftpd iptables dnsmasq python-flask
COPY --from=0 /bin/pixiecore /usr/bin/pixiecore
RUN chmod +x /usr/bin/pixiecore

RUN mkdir -p /www /var/run/vsftpd/empty
# && \
Expand Down
2 changes: 1 addition & 1 deletion containers/dgxie/dnsmasq.conf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ log-facility=/var/log/dnsmasq.log
#enable-tftp
#tftp-root=/tftpboot

dhcp-authoritative
#dhcp-authoritative
dhcp-range=#DHCP_START#,#DHCP_END#,#LEASETIME#
#dhcp-option-force=209,efi64/pxelinux.cfg
dhcp-option=tag:green,option:domain-search,#DOMAIN#
Expand Down
Binary file removed containers/dgxie/misc/pixiecore
Binary file not shown.
37 changes: 36 additions & 1 deletion containers/dgxie/start
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,43 @@ LEASETIME=${LEASETIME:-7200}
DOMAIN=${DOMAIN:-local}
HTTPS_PROXY=${HTTPS_PROXY:-}
EXTRA_PACKAGES=${EXTRA_PACKAGES:-}
MY_POD_NAME=${MY_POD_NAME:-}
NUM_OF_REPLICAS=${NUM_OF_REPLICAS:-1}
###

iptoi()
{
a=$(echo "${1}" | cut -d"." -f1)
b=$(echo "${1}" | cut -d"." -f2)
c=$(echo "${1}" | cut -d"." -f3)
d=$(echo "${1}" | cut -d"." -f4)

echo "$(((a<<24)+(b<<16)+(c<<8)+d))"
}

itoip()
{
#returns the dotted-decimal ascii form of an IP arg passed in integer format
echo -n $(($(($(($((${1}/256))/256))/256))%256)).
echo -n $(($(($((${1}/256))/256))%256)).
echo -n $(($((${1}/256))%256)).
echo $((${1}%256))
}

SHARD_IDX=$(( ${MY_POD_NAME##*-} ))

if [ "${NUM_OF_REPLICAS}" -gt 1 ]; then
STARTINT=$(iptoi ${DHCP_START})
ENDINT=$(iptoi ${DHCP_END})

IPCOUNT=$(( ${ENDINT} - ${STARTINT} + 1 ))

SHARDIPCOUNT=$(( ${IPCOUNT} / ${NUM_OF_REPLICAS} ))

DHCP_START=$(itoip $(( STARTINT + (SHARD_IDX * SHARDIPCOUNT) )))
DHCP_END=$(itoip $(( STARTINT + ((SHARD_IDX + 1) * (SHARDIPCOUNT) - 1) )))
fi

mkdir -p "${ISO}"

# Check for mounted DGX Server ISO
Expand Down Expand Up @@ -100,6 +135,6 @@ nginx &
/usr/sbin/vsftpd /etc/vsftpd.conf &
/usr/local/bin/rest_api.py >/dev/null 2>&1 &
python /api.py &
/usr/bin/pixiecore api http://127.0.0.1:${HTTP_PORT} --dhcp-no-bind -p 81 &
/usr/bin/pixiecore api http://127.0.0.1:${HTTP_PORT} --dhcp-no-bind -p 81 --signing-key ${PIXIECORE_SIGNING_KEY} &
dnsmasq
tail -f /var/log/dnsmasq.log
46 changes: 26 additions & 20 deletions services/dgxie/templates/deployment.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
apiVersion: v1
kind: Secret
metadata:
name: pixiecore
type: Opaque
data:
signingkey: {{ .Values.pixiecoreSigningKey | b64enc | b64enc }}
---
apiVersion: apps/v1beta2
kind: Deployment
kind: StatefulSet
metadata:
name: {{ template "dgxie.fullname" . }}
labels:
Expand Down Expand Up @@ -72,7 +80,9 @@ spec:
value: "{{ .Values.netDomain }}"
# IP address of private network interface on management server
- name: IP
value: "{{ .Values.netPrvIp }}"
valueFrom:
fieldRef:
fieldPath: status.podIP
# Private Network
- name: NETWORK
value: "{{ .Values.netPrvNet }}"
Expand Down Expand Up @@ -102,6 +112,17 @@ spec:
value: "{{ .Values.httpsProxy }}"
- name: EXTRA_PACKAGES
value: "{{ .Values.extraPackages }}"
- name: NUM_OF_REPLICAS
value: "{{ .Values.replicaCount }}"
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: PIXIECORE_SIGNING_KEY
valueFrom:
secretKeyRef:
name: pixiecore
key: signingkey
resources:
{{ toYaml .Values.resources | indent 12 }}
{{- with .Values.nodeSelector }}
Expand Down Expand Up @@ -132,21 +153,6 @@ spec:
clusterName: rook-ceph
path: /iso
- name: dhcp-leases
persistentVolumeClaim:
claimName: dhcp-leases
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: dhcp-leases
labels:
app: {{ template "dgxie.name" . }}
annotations:
volume.alpha.kubernetes.io/storage-class: default
spec:
storageClassName: rook-ceph-block
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10M
hostPath:
path: /mnt/dhcp-leases
type: DirectoryOrCreate
3 changes: 2 additions & 1 deletion services/dgxie/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ dgxKernExtra: ''
mgmtIntPub: eth0
mgmtIntPrv: eth1
netDomain: local
netPrvIp: 192.168.1.1
netPrvNet: 192.168.1.0
netPrvNetmask: 255.255.255.0
netPrvGateway: 192.168.1.1
Expand All @@ -29,6 +28,8 @@ extraPackages: ''
# Deployment config
replicaCount: 1

pixiecoreSigningKey: 'mysecret'

image:
repository: deepops/dgxie
tag: latest
Expand Down