diff --git a/huggingface-model/.gitignore b/huggingface-model/.gitignore new file mode 100644 index 0000000..80bf7fc --- /dev/null +++ b/huggingface-model/.gitignore @@ -0,0 +1 @@ +charts \ No newline at end of file diff --git a/huggingface-model/Chart.lock b/huggingface-model/Chart.lock new file mode 100644 index 0000000..2b39778 --- /dev/null +++ b/huggingface-model/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: mongodb + repository: oci://registry-1.docker.io/bitnamicharts + version: 14.3.0 +digest: sha256:1536cff09b6be684c82ea0780c96f266c549494dfacec84371054fa30e6c5cfa +generated: "2023-11-21T15:00:46.692964492+02:00" diff --git a/huggingface-model/Chart.yaml b/huggingface-model/Chart.yaml index 760f2d8..d7b319b 100644 --- a/huggingface-model/Chart.yaml +++ b/huggingface-model/Chart.yaml @@ -1,8 +1,13 @@ apiVersion: v2 name: huggingface-model -description: Helm chart for deploy Hugging Face models and chat-ui to Kubernetes cluster. See [Hugging Face models](https://huggingface.co/models) +description: Helm chart for deploy Hugging Face models to Kubernetes cluster. See [Hugging Face models](https://huggingface.co/models) type: application -version: 0.0.23 +version: 0.1.0 +dependencies: +- condition: mongodb.enabled + name: mongodb + repository: oci://registry-1.docker.io/bitnamicharts + version: 14.x.x \ No newline at end of file diff --git a/huggingface-model/templates/_helpers.tpl b/huggingface-model/templates/_helpers.tpl index 7137233..fec6c7a 100644 --- a/huggingface-model/templates/_helpers.tpl +++ b/huggingface-model/templates/_helpers.tpl @@ -1,6 +1,14 @@ {{/* Generate internal container port. */}} +{{- define "huggingface-model.chat.base-config" -}} +- name: {{ .Values.model.organization }}/{{ .Values.model.name }} + endpoints: + - url: http://{{ include "huggingface-model.fullname" . }}:{{ .Values.service.port | default 8080 }} + type: "tgi" +{{- if .Values.chat.modelConfig }}{{- .Values.chat.modelConfig | toYaml | nindent 2 }}{{ end }} +{{- if .Values.chat.additionalModels }}{{ .Values.chat.additionalModels | toYaml | nindent 0 }}{{ end }} +{{- end}} {{- define "huggingface-model.containerPort" -}} {{- if .Values.huggingface }} {{- default 8080 .Values.huggingface.containerPort }} @@ -70,7 +78,10 @@ Selector labels app.kubernetes.io/name: {{ include "huggingface-model.name" . }} app.kubernetes.io/instance: {{ .Release.Name }} {{- end }} - +{{- define "huggingface-chat.selectorLabels" -}} +app.kubernetes.io/name: {{ include "huggingface-model.name" . }}-chat-ui +app.kubernetes.io/instance: {{ .Release.Name }}-chat +{{- end }} {{/* Create the name of the service account to use */}} diff --git a/huggingface-model/templates/application.yaml b/huggingface-model/templates/application.yaml index 1396ecf..5e2f2f0 100644 --- a/huggingface-model/templates/application.yaml +++ b/huggingface-model/templates/application.yaml @@ -6,8 +6,6 @@ apiVersion: apps/v1 kind: {{ $kind }} metadata: - annotations: - reloader.stakater.com/auto: "true" name: {{ include "huggingface-model.fullname" . }} labels: {{- include "huggingface-model.labels" . | nindent 4 }} @@ -59,7 +57,7 @@ spec: args: - | set -x - if [ -f "/usr/src/{{ .Values.model.name }}/config.json" ]; then echo "Model {{ .Values.model.organization }} is already downloaded. Skipping init..."; exit 0; fi + if [ -f "/usr/src/{{ .Values.model.name }}/config.json" ]; then echo "Model {{ .Values.model.name }} is already downloaded. Skipping init..."; exit 0; fi rm -rf /usr/src/{{ .Values.model.name }}/* {{- if and .Values.init.s3.enabled }} apk add --update aws-cli diff --git a/huggingface-model/templates/chat/application.yaml b/huggingface-model/templates/chat/application.yaml new file mode 100644 index 0000000..4fe40e2 --- /dev/null +++ b/huggingface-model/templates/chat/application.yaml @@ -0,0 +1,73 @@ +{{- if .Values.chat.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "huggingface-model.fullname" . }}-chat + labels: + {{- include "huggingface-model.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.chat.replicaCount }} + selector: + matchLabels: + {{- include "huggingface-chat.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.chat.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "huggingface-chat.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.chat.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: model + image: {{ .Values.chat.image.repo }}:{{ .Values.chat.image.tag }} + imagePullPolicy: {{ .Values.chat.image.pullPolicy }} + ports: + - containerPort: 3000 + env: + - name: MODELS + value: '{{ toRawJson (fromYamlArray (include "huggingface-model.chat.base-config" .)) }}' + - name: MONGODB_HOST + value: {{ if .Values.mongodb.install }}{{ include "mongodb.service.nameOverride" .Subcharts.mongodb }}{{ else }}{{ .Values.chat.mongodb.host }}{{ end }} + {{- if .Values.mongodb.port }} + - name: MONGODB_PORT + value: {{ .Values.chat.mongodb.port | quote }} + {{- end }} + - name: MONGODB_USER + value: {{ .Values.chat.mongodb.user | default "root" }} + {{- if .Values.mongodb.install }} + - name: MONGODB_URL_PARAMS + value: "admin?directConnection=true&authSource=admin" + - name: MONGODB_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "mongodb.service.nameOverride" .Subcharts.mongodb }} + key: mongodb-root-password + {{- else }} + {{- if .Values.mongodb.password }} + - name: MONGODB_PASSWORD + value: {{ .Values.chat.mongodb.password | quote }} + - name: MONGODB_URL_PARAMS + value: {{ .Values.chat.mongodb.urlParams }} + {{- end }} + {{- end }} + resources: + {{- toYaml .Values.chat.resources | nindent 12 }} + {{- with .Values.chat.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.chat.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.chat.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/huggingface-model/templates/chat/ingress.yaml b/huggingface-model/templates/chat/ingress.yaml new file mode 100644 index 0000000..5a0f43c --- /dev/null +++ b/huggingface-model/templates/chat/ingress.yaml @@ -0,0 +1,53 @@ +{{- if and .Values.chat.enabled .Values.chat.ingress.enabled }} +{{- $fullName := printf "%s-chat" (include "huggingface-model.fullname" .) -}} +{{- $serviceName := printf "%s-chat" (include "huggingface-model.fullname" .) -}} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "huggingface-model.labels" . | nindent 4 }} + {{- with .Values.chat.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .Values.chat.ingress.tls }} + tls: + {{- range .Values.chat.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.chat.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + pathType: {{ .pathType }} + backend: + service: + name: {{ $serviceName }} + port: + {{- if .servicePort }} + {{- .servicePort | toYaml | nindent 18 }} + {{- else }} + number: 8080 + {{- end }} + {{- if .extendedOptions }} + {{- .extendedOptions | toYaml | nindent 14 }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/huggingface-model/templates/chat/service.yaml b/huggingface-model/templates/chat/service.yaml new file mode 100644 index 0000000..affcbd4 --- /dev/null +++ b/huggingface-model/templates/chat/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "huggingface-model.fullname" . }}-chat + labels: + {{- include "huggingface-model.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type | default "ClusterIP" }} + ports: + - name: http + port: {{ .Values.service.port | default 8080 }} + targetPort: 3000 + selector: + {{- include "huggingface-chat.selectorLabels" . | nindent 4 }} diff --git a/huggingface-model/templates/ingress.yaml b/huggingface-model/templates/ingress.yaml index 691883f..0432d9a 100644 --- a/huggingface-model/templates/ingress.yaml +++ b/huggingface-model/templates/ingress.yaml @@ -1,29 +1,25 @@ -{{- if .Values.ingresses.enabled -}} -{{- $gitVersion := .Capabilities.KubeVersion.GitVersion }} -{{- $labels := include "huggingface-model.labels" . -}} -{{- $fullName := include "huggingface-model.fullname" . -}} -{{- range $index, $_ := .Values.ingresses.configs }} ---- -{{- if semverCompare ">=1.19-0" $gitVersion -}} +{{- if .Values.ingress.enabled -}} +{{- $fullName := (include "huggingface-model.fullname" .) -}} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} apiVersion: networking.k8s.io/v1 -{{- else if semverCompare ">=1.14-0" $gitVersion -}} +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} apiVersion: networking.k8s.io/v1beta1 {{- else -}} apiVersion: extensions/v1beta1 {{- end }} kind: Ingress metadata: - name: {{ $fullName }}-{{ $index }} + name: {{ $fullName }} labels: - {{- $labels | nindent 4 }} - {{- with .annotations }} + {{- include "huggingface-model.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} annotations: {{- toYaml . | nindent 4 }} {{- end }} spec: - {{- if .tls }} + {{- if .Values.ingress.tls }} tls: - {{- range .tls }} + {{- range .Values.ingress.tls }} - hosts: {{- range .hosts }} - {{ . | quote }} @@ -32,7 +28,7 @@ spec: {{- end }} {{- end }} rules: - {{- range .hosts }} + {{- range .Values.ingress.hosts }} - host: {{ .host | quote }} http: paths: @@ -50,7 +46,7 @@ spec: {{- if .servicePort }} {{- .servicePort | toYaml | nindent 18 }} {{- else }} - number: 80 + number: 8080 {{- end }} {{- if .extendedOptions }} {{- .extendedOptions | toYaml | nindent 14 }} @@ -58,4 +54,3 @@ spec: {{- end }} {{- end }} {{- end }} -{{- end }} diff --git a/huggingface-model/values.yaml b/huggingface-model/values.yaml index 907c6f2..da96a54 100644 --- a/huggingface-model/values.yaml +++ b/huggingface-model/values.yaml @@ -8,8 +8,99 @@ ## name: zephyr-7b-beta ## model: - organization: "" - name: "" + organization: "meta-llama" + name: "Llama-2-70b-chat-hf" + +chat: + enabled: false + replicaCount: 1 + podAnnotations: {} + imagePullSecrets: [] + ## @param chat.affinity Affinity for pod assignment + ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + ## NOTE: podAffinityPreset, podAntiAffinityPreset, and nodeAffinityPreset will be ignored when it's set + ## + affinity: {} + ## @param chat.nodeSelector Node labels for pod assignment + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + ## + nodeSelector: {} + ## @param chat.tolerations Tolerations for pod assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + ## + tolerations: [] + modelConfig: {} + ## e.g + # parameters: + # temperature: 0.1 + # top_p: 0.95 + # repetition_penalty: 1.2 + # top_k: 50 + # truncate: 1000 + # max_new_tokens: 1024 + # datasetName: OpenAssistant/oasst1 + # description: A good alternative to ChatGPT + # websiteUrl: https://open-assistant.io + # userMessageToken: "<|prompter|>" + # assistantMessageToken: "<|assistant|>" + # messageEndToken: "" + # preprompt: | + # Below are a series of dialogues between various people and an AI assistant. The AI tries to be helpful, polite, honest, sophisticated, emotionally aware, and humble-but-knowledgeable. The assistant is happy to help with almost anything, and will do its best to understand exactly what is needed. It also tries to avoid giving false or misleading information, and it caveats when it isn't entirely sure about the right answer. That said, the assistant is practical and really does its best, and doesn't let caution get too much in the way of being useful. + # ----- + # promptExamples: + # - title: Write an email from bullet list + # prompt: "As a restaurant owner, write a professional email to the supplier to + # get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)" + # - title: Code a snake game + # prompt: Code a basic snake game in python, give explanations for each step. + # - title: Assist in a task + # prompt: How do I make a delicious lemon cheesecake? + # parameters: + # temperature: 0.9 + # top_p: 0.95 + # repetition_penalty: 1.2 + # top_k: 50 + + additionalModels: [] + # - name: "Llama-2-70b-chat-hf + # endpoints: + # - url: "http://exampl.com:8080/model/api" + # type: "tgi" + # parameters: + # temperature: 0.1 + # top_p: 0.95 + # repetition_penalty: 1.2 + # top_k: 50 + # truncate: 1000 + # max_new_tokens: 1024 + # datasetName: OpenAssistant/oasst1 + resources: + requests: + cpu: "0.5" + memory: "512M" + image: + repo: "shalb/hf-chat-ui" + tag: "v0.8" + pullPolicy: IfNotPresent + mongodb: + host: "" + user: "root" + password: "" + port: "27017" + urlParams: "admin?directConnection=true&authSource=admin" + ingress: + enabled: false + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-http" + hosts: + - host: api.model.example.com + paths: + - path: / + pathType: Prefix + tls: + - hosts: + - api.model.example.com + secretName: huggingface-model ## Init configuration. By default, init clone model from Huggingface git using git-lfs. ## The another way is to upload model to s3 bucket to reduce init delay and external traffic. @@ -159,26 +250,23 @@ extraEnvVars: [] ## Configure the ingresses resources list that allows you to access the model API ## @param ingresses.enabled Enable/disable ingress(es) for model API, default disabled ## -ingresses: - enabled: false +ingress: + enabled: true ## ingresses list ## ref: https://kubernetes.io/docs/concepts/services-networking/ingress/ ## @param ingresses.configs List of ingresses configs ## e.g. - ## configs: - ## - annotations: - ## cert-manager.io/cluster-issuer: "letsencrypt-http" - ## hosts: - ## - host: api.model.example.com - ## paths: - ## - path: / - ## pathType: Prefix - ## tls: - ## - hosts: - ## - api.model.example.com - ## secretName: huggingface-model - ## - configs: [] + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-http" + hosts: + - host: api.model.example.com + paths: + - path: / + pathType: Prefix + tls: + - hosts: + - api.model.example.com + secretName: huggingface-model ## @param livenessProbe Configure extra options for model liveness probe @@ -287,3 +375,7 @@ nodeSelector: {} ## tolerations: [] +mongodb: + install: true + auth: + rootPassword: ""