-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement mass machine type transition job WIP #2
base: main
Are you sure you want to change the base?
Changes from 5 commits
763ec77
7e70d95
1febd5e
7233349
87dbd43
49b1e41
e7d4d01
906d592
d72c2e7
b0b2b56
e97b107
aec2a17
88419b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
FROM golang:alpine | ||
RUN mkdir /app | ||
ADD . /app/ | ||
WORKDIR /app | ||
RUN go build -o main . | ||
RUN adduser -S -D -H -h /app appuser | ||
USER appuser | ||
CMD ["./main"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,5 @@ | ||
# Mass machine type transition | ||
This project contains the k8s job that will perform a mass machine type transition on an OpenShift cluster. | ||
|
||
## Notes | ||
This is a more cohesive version of the mass machine type transition. This implementation uses Docker to create an image from the golang script that performs the machine type transition, and hangs until all warnings are cleared. The .yaml file is a job that uses this generated image, and will only be completed once this script exits, i.e. there are no warning labels left on any VMs. To run this locally, you will need to build a Docker image with the tag "machine-type-transition", then start the job with kubectl apply. This is not comprehensively tested yet; in the future I will be automating the process of running this job, as well as adding functionality for the user to specify flags to update machine types only on specific namespaces, and to force a restart/shut down of all running VMs when updating their machine types. |
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,14 +1,25 @@ | ||||||||||||
package mass_machine_type_transition | ||||||||||||
|
||||||||||||
import ( | ||||||||||||
"fmt" | ||||||||||||
"strings" | ||||||||||||
"time" | ||||||||||||
|
||||||||||||
k8sv1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||||||||||||
"k8s.io/apimachinery/pkg/fields" | ||||||||||||
"k8s.io/apimachinery/pkg/types" | ||||||||||||
"k8s.io/client-go/tools/cache" | ||||||||||||
k6tv1 "kubevirt.io/api/core/v1" | ||||||||||||
"kubevirt.io/client-go/kubecli" | ||||||||||||
"time" | ||||||||||||
) | ||||||||||||
|
||||||||||||
// using this as a const allows us to easily modify the program to update if a newer version is released | ||||||||||||
// we generally want to be updating the machine types to the most recent version | ||||||||||||
const latestVersion = "rhel9.2.0" | ||||||||||||
|
||||||||||||
var vmiList = []string{} | ||||||||||||
var exitJob = make(chan struct{}) | ||||||||||||
|
||||||||||||
func getVirtCli() (kubecli.KubevirtClient, error) { | ||||||||||||
clientConfig, err := kubecli.GetKubevirtClientConfig() | ||||||||||||
if err != nil { | ||||||||||||
|
@@ -23,14 +34,89 @@ func getVirtCli() (kubecli.KubevirtClient, error) { | |||||||||||
return virtCli, err | ||||||||||||
} | ||||||||||||
|
||||||||||||
func getVmiInformer() (cache.SharedIndexInformer, error) { | ||||||||||||
virtCli, err := getVirtCli() | ||||||||||||
if err != nil { | ||||||||||||
return nil, err | ||||||||||||
} | ||||||||||||
|
||||||||||||
func getVmiInformer(virtCli kubecli.KubevirtClient) (cache.SharedIndexInformer, error) { | ||||||||||||
listWatcher := cache.NewListWatchFromClient(virtCli.RestClient(), "virtualmachineinstances", k8sv1.NamespaceAll, fields.Everything()) | ||||||||||||
vmiInformer := cache.NewSharedIndexInformer(listWatcher, &k6tv1.VirtualMachineInstance{}, 1*time.Hour, cache.Indexers{}) | ||||||||||||
|
||||||||||||
vmiInformer.AddEventHandler(cache.ResourceEventHandlerFuncs { | ||||||||||||
DeleteFunc: removeWarningLabel, | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not entirely sure, but I think that reboots might not delete the vmi entirely, but rather leave in a I would also add a small validation to
Suggested change
|
||||||||||||
}) | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WDYT about adding an event handler for newly created VMIs? Although I'm not entirely sure it's necessary. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could add that but I am also not sure if it is something that we would need. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's skip that for now. we can always add that in follow-ups |
||||||||||||
|
||||||||||||
return vmiInformer, nil | ||||||||||||
} | ||||||||||||
|
||||||||||||
func updateMachineTypes(virtCli kubecli.KubevirtClient) error { | ||||||||||||
vmList, err := virtCli.VirtualMachine(k8sv1.NamespaceAll).List(&k8sv1.ListOptions{}) | ||||||||||||
if err != nil { | ||||||||||||
return err | ||||||||||||
} | ||||||||||||
for _, vm := range vmList.Items { | ||||||||||||
machineType := vm.Spec.Template.Spec.Domain.Machine.Type | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs to be fetched from status instead of spec There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've tried to fetch this but neither VirtualMachineStatus nor VirtualMachineInstanceStatus have a field for machine types. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can find it here. As mentioned in this comment, this is how it looks like: spec:
domain:
machine:
type: q35
status:
machine:
type: pc-q35-rhel8.2.0 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I looked into this a little more, and I believe the reason why the VMI you created has just "q35" in the machine type in spec is because that is the alias for the default machine type (which is the most recent RHEL version I believe) if a machine type is not explicitly specified in the yaml. I created a VM where I explicitly set the machine type, and was able to see the full machine type string in the Spec for both the VM and the VMI. This also works if you modify examples/vmi-fedora.yaml to have an explicitly stated machine type. That is to say, I think it is still correct to retrieve the machine type from the VM spec, and there already exists logic that will catch any machine types that do not fit the "pc-q35-rhelx.x.x" version, so VMs with a machine type of just "q35" will be ignored. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In my opinion this is a bug. Am I missing anything? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I see what you mean. I still am not sure how to access the VMI status.machine.type because I just don't see it in any of the documentation anywhere, but it might be good then in the case of the machine type being |
||||||||||||
machineTypeSubstrings := strings.Split(machineType, "-") | ||||||||||||
version := machineTypeSubstrings[2] | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we sure that every VMI would have a machine type of this format? spec:
domain:
machine:
type: q35 In this case this job would crash. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do see machine:
type: pc-q35-rhel9.2.0 under status though There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay so after some investigation I think I understand what needs to be done exactly. spec:
domain:
machine:
type: q35
status:
machine:
type: pc-q35-rhel8.2.0 So, what would need to be done is:
Open questions: |
||||||||||||
if len(machineTypeSubstrings) != 3 { | ||||||||||||
return nil | ||||||||||||
} | ||||||||||||
|
||||||||||||
if strings.Contains(version, "rhel") && version < "rhel9.0.0" { | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What did you try to achieve with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Basically the requirements for updating the machine types is updating the machine type of any VM with a machine type that is prior to RHEL 9. There are some other formats of machine type I believe so I will need validate to prevent this from crashing though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure I understand. Since we only want to update machine types with versions prior to the specified version (in this case RHEL 9), wouldn't a lexographical comparison of the versions be sufficient? Unless there is a format of machine type that I am missing. In this case, "rhel8.4.0" < "rhel9.0.0" returns true, which satisfies the conditions I believe. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, you have a point. As long as we're sure this is indeed always the format, your approach is completely valid. Saying that, it is never too bad to add some more validations, as this can go silently wrong if something changes (what if the string is empty for some reason? or if the format changes slightly, like But again, if you're certain that this is always the format, feel free to leave it as it 👍 |
||||||||||||
machineTypeSubstrings[2] = latestVersion | ||||||||||||
machineType = strings.Join(machineTypeSubstrings, "-") | ||||||||||||
updateMachineType := fmt.Sprintf(`{"spec": {"template": {"spec": {"domain": {"machine": {"type": "%s"}}}}}}`, machineType) | ||||||||||||
|
||||||||||||
_, err = virtCli.VirtualMachine(vm.Namespace).Patch(vm.Name, types.StrategicMergePatchType, []byte(updateMachineType), &k8sv1.PatchOptions{}) | ||||||||||||
if err != nil { | ||||||||||||
return err | ||||||||||||
} | ||||||||||||
|
||||||||||||
// add label to running VMs that a restart is required for change to take place | ||||||||||||
if vm.Status.Ready { | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There's a slight difference between If we use |
||||||||||||
err = addWarningLabel(virtCli, &vm) | ||||||||||||
if err != nil { | ||||||||||||
return err | ||||||||||||
} | ||||||||||||
} | ||||||||||||
} | ||||||||||||
} | ||||||||||||
return nil | ||||||||||||
} | ||||||||||||
|
||||||||||||
func addWarningLabel (virtCli kubecli.KubevirtClient, vm *k6tv1.VirtualMachine) error { | ||||||||||||
addLabel := fmt.Sprint(`{"metadata": {"labels": {"restart-vm-required": "true"}}}}}`) | ||||||||||||
_, err := virtCli.VirtualMachineInstance(vm.Namespace).Patch(vm.Name, types.StrategicMergePatchType, []byte(addLabel), &k8sv1.PatchOptions{}) | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO we better add the label to the VM object instead of the VMI.
WDYT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This makes sense to me. I think this might have been what I was originally planning to do (I can't remember why I decided not to but I can't think of a good reason for it). |
||||||||||||
if err != nil { | ||||||||||||
return err | ||||||||||||
} | ||||||||||||
vmiList = append(vmiList, vm.Name) | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please all add the VM's namespace, that way it's a unique identifier. |
||||||||||||
|
||||||||||||
return nil | ||||||||||||
} | ||||||||||||
|
||||||||||||
func removeWarningLabel(obj interface{}) { | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need to remove a label from a deleted VMI. |
||||||||||||
vmi, ok := obj.(*k6tv1.VirtualMachineInstance) | ||||||||||||
if !ok { | ||||||||||||
return | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please log / print an error in this case, since it never should happen |
||||||||||||
} | ||||||||||||
|
||||||||||||
//check if deleted VMI is in list of VMIs that need to be restarted | ||||||||||||
vmiIndex := searchVMIList(vmi.Name) | ||||||||||||
if vmiIndex == -1 { | ||||||||||||
return | ||||||||||||
} | ||||||||||||
|
||||||||||||
// remove deleted VMI from list | ||||||||||||
vmiList = append(vmiList[:vmiIndex], vmiList[vmiIndex+1:]...) | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not that it's critical, but I think this could have been much easier with a map. The map could be of type |
||||||||||||
|
||||||||||||
// check if VMI list is now empty, to signal exiting the job | ||||||||||||
if len(vmiList) == 0 { | ||||||||||||
close(exitJob) | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
func searchVMIList(vmiName string) int { | ||||||||||||
for i, element := range vmiList { | ||||||||||||
if element == vmiName { | ||||||||||||
return i | ||||||||||||
} | ||||||||||||
} | ||||||||||||
return -1 | ||||||||||||
} |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -3,14 +3,24 @@ package mass_machine_type_transition | |||||||||
import "os" | ||||||||||
|
||||||||||
func main() { | ||||||||||
vmiInformer, err := getVmiInformer() | ||||||||||
virtCli, err := getVirtCli() | ||||||||||
if err != nil { | ||||||||||
os.Exit(1) | ||||||||||
} | ||||||||||
|
||||||||||
if vmiInformer != nil { | ||||||||||
|
||||||||||
vmiInformer, err := getVmiInformer(virtCli) | ||||||||||
if err != nil { | ||||||||||
os.Exit(1) | ||||||||||
} | ||||||||||
|
||||||||||
|
||||||||||
stopCh := make(chan struct{}) | ||||||||||
go vmiInformer.Run(stopCh) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can use
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will also initialize |
||||||||||
|
||||||||||
|
||||||||||
updateMachineTypes(virtCli) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please handle the error (by either printing it to stdout or logging it) |
||||||||||
|
||||||||||
// wait for list of VMIs that need restart to be empty | ||||||||||
<-exitJob | ||||||||||
|
||||||||||
os.Exit(0) | ||||||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
apiVersion: batch/v1 | ||
kind: Job | ||
metadata: | ||
name: mass-machine-type-transition | ||
spec: | ||
template: | ||
metadata: | ||
name: mass-machine-type-transition | ||
spec: | ||
containers: | ||
- name: machine-type-transition | ||
image: machine-type-transition:latest | ||
imagePullPolicy: IfNotPresent | ||
restartPolicy: OnFailure | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add the environment variables with default values to the job manifest. It will help guide users to use them properly. apiVersion: batch/v1
kind: Job
metadata:
name: mass-machine-type-transition
spec:
template:
metadata:
name: mass-machine-type-transition
spec:
containers:
- name: machine-type-transition
image: machine-type-transition:latest
imagePullPolicy: IfNotPresent
env:
- name: NAMESPACE
value: "" # default is having no selector
- name: RESTART_NOW
value: false
restartPolicy: OnFailure |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
package mass_machine_type_transition | ||
/* | ||
import ( | ||
"context" | ||
//"strings" | ||
|
||
batchv1 "k8s.io/api/batch/v1" | ||
v1 "k8s.io/api/core/v1" | ||
k8sv1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"kubevirt.io/client-go/kubecli" | ||
) | ||
|
||
|
||
// leftover from nested job approach | ||
func generateMachineTypeTransitionJob(virtCli kubecli.KubevirtClient, namespace *string, completionCount *int32) error { | ||
jobs := virtCli.BatchV1().Jobs("default") | ||
jobSpec := &batchv1.Job{ | ||
ObjectMeta: k8sv1.ObjectMeta{ | ||
Name: "mass-machine-type-transition", | ||
Namespace: *namespace, | ||
}, | ||
Spec: batchv1.JobSpec{ | ||
Completions: completionCount, | ||
Template: v1.PodTemplateSpec{ | ||
Spec: v1.PodSpec{ | ||
Containers: []v1.Container{ | ||
{ | ||
Name: "mass-machine-type-transition", | ||
// not 100% sure what to use for image here | ||
Image: "", | ||
Command: "", | ||
}, | ||
}, | ||
RestartPolicy: v1.RestartPolicyOnFailure, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
_, err := jobs.Create(context.TODO(), jobSpec, k8sv1.CreateOptions{}) | ||
if err != nil { | ||
return err | ||
} | ||
return nil | ||
}*/ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: