diff --git a/operator/roles/forkliftcontroller/defaults/main.yml b/operator/roles/forkliftcontroller/defaults/main.yml index 86f7c8b39..92db25c0f 100644 --- a/operator/roles/forkliftcontroller/defaults/main.yml +++ b/operator/roles/forkliftcontroller/defaults/main.yml @@ -35,7 +35,6 @@ controller_snapshot_removal_timeout_minuts: 120 controller_snapshot_status_check_rate_seconds: 10 controller_cleanup_retries: 10 controller_dv_status_check_retries: 10 -controller_snapshot_removal_check_retries: 20 controller_vsphere_incremental_backup: true controller_ovirt_warm_migration: true controller_retain_precopy_importer_pods: false diff --git a/operator/roles/forkliftcontroller/templates/controller/deployment-controller.yml.j2 b/operator/roles/forkliftcontroller/templates/controller/deployment-controller.yml.j2 index 28bfbcc77..84cc6d480 100644 --- a/operator/roles/forkliftcontroller/templates/controller/deployment-controller.yml.j2 +++ b/operator/roles/forkliftcontroller/templates/controller/deployment-controller.yml.j2 @@ -85,10 +85,6 @@ spec: - name: DV_STATUS_CHECK_RETRIES value: "{{ controller_dv_status_check_retries }}" {% endif %} -{% if controller_snapshot_removal_check_retries is number %} - - name: SNAPSHOT_REMOVAL_CHECK_RETRIES - value: "{{ controller_snapshot_removal_check_retries }}" -{% endif %} {% if controller_max_vm_inflight is number %} - name: MAX_VM_INFLIGHT value: "{{ controller_max_vm_inflight }}" diff --git a/pkg/controller/plan/adapter/base/doc.go b/pkg/controller/plan/adapter/base/doc.go index 0900f3397..c08f75464 100644 --- a/pkg/controller/plan/adapter/base/doc.go +++ b/pkg/controller/plan/adapter/base/doc.go @@ -108,7 +108,9 @@ type Client interface { // Remove a snapshot. RemoveSnapshot(vmRef ref.Ref, snapshot string, hostsFunc util.HostsFunc) error // Check if a snapshot is ready to transfer. - CheckSnapshotReady(vmRef ref.Ref, snapshot string) (ready bool, err error) + CheckSnapshotReady(vmRef ref.Ref, snapshot string) (ready bool, snapshotId string, err error) + // Check if a snapshot is removed. + CheckSnapshotRemoved(vmRef ref.Ref, snapshot string) (ready bool, err error) // Set DataVolume checkpoints. SetCheckpoints(vmRef ref.Ref, precopies []planapi.Precopy, datavolumes []cdi.DataVolume, final bool, hostsFunc util.HostsFunc) (err error) // Close connections to the provider API. diff --git a/pkg/controller/plan/adapter/ocp/client.go b/pkg/controller/plan/adapter/ocp/client.go index c17cf49a8..951264857 100644 --- a/pkg/controller/plan/adapter/ocp/client.go +++ b/pkg/controller/plan/adapter/ocp/client.go @@ -25,7 +25,12 @@ type Client struct { } // CheckSnapshotReady implements base.Client -func (r *Client) CheckSnapshotReady(vmRef ref.Ref, snapshot string) (bool, error) { +func (r *Client) CheckSnapshotReady(vmRef ref.Ref, snapshot string) (ready bool, snapshotId string, err error) { + return +} + +// CheckSnapshotRemoved implements base.Client +func (r *Client) CheckSnapshotRemoved(vmRef ref.Ref, snapshot string) (bool, error) { return false, nil } diff --git a/pkg/controller/plan/adapter/openstack/client.go b/pkg/controller/plan/adapter/openstack/client.go index e8e7a89da..6291a095c 100644 --- a/pkg/controller/plan/adapter/openstack/client.go +++ b/pkg/controller/plan/adapter/openstack/client.go @@ -115,10 +115,15 @@ func (r *Client) CreateSnapshot(vmRef ref.Ref, hostsFunc util.HostsFunc) (imageI } // Check if a snapshot is ready to transfer. -func (r *Client) CheckSnapshotReady(vmRef ref.Ref, imageID string) (ready bool, err error) { +func (r *Client) CheckSnapshotReady(vmRef ref.Ref, snapshot string) (ready bool, snapshotId string, err error) { return } +// CheckSnapshotRemoved implements base.Client +func (r *Client) CheckSnapshotRemoved(vmRef ref.Ref, snapshot string) (bool, error) { + return false, nil +} + // Set DataVolume checkpoints. func (r *Client) SetCheckpoints(vmRef ref.Ref, precopies []planapi.Precopy, datavolumes []cdi.DataVolume, final bool, hostsFunc util.HostsFunc) error { return nil diff --git a/pkg/controller/plan/adapter/ova/client.go b/pkg/controller/plan/adapter/ova/client.go index c1ef2838d..c0a676e1c 100644 --- a/pkg/controller/plan/adapter/ova/client.go +++ b/pkg/controller/plan/adapter/ova/client.go @@ -61,10 +61,15 @@ func (r *Client) GetSnapshotDeltas(vmRef ref.Ref, snapshot string, hostsFunc uti } // Check if a snapshot is ready to transfer, to avoid importer restarts. -func (r *Client) CheckSnapshotReady(vmRef ref.Ref, snapshot string) (ready bool, err error) { +func (r *Client) CheckSnapshotReady(vmRef ref.Ref, snapshot string) (ready bool, snapshotId string, err error) { return } +// CheckSnapshotRemoved implements base.Client +func (r *Client) CheckSnapshotRemoved(vmRef ref.Ref, snapshot string) (bool, error) { + return false, nil +} + // Set DataVolume checkpoints. func (r *Client) SetCheckpoints(vmRef ref.Ref, precopies []planapi.Precopy, datavolumes []cdi.DataVolume, final bool, hostsFunc util.HostsFunc) (err error) { return diff --git a/pkg/controller/plan/adapter/ovirt/client.go b/pkg/controller/plan/adapter/ovirt/client.go index 4a7a7fec5..11d404bdc 100644 --- a/pkg/controller/plan/adapter/ovirt/client.go +++ b/pkg/controller/plan/adapter/ovirt/client.go @@ -75,7 +75,7 @@ func (r *Client) CreateSnapshot(vmRef ref.Ref, hostsFunc util.HostsFunc) (snapsh } // Check if a snapshot is ready to transfer, to avoid importer restarts. -func (r *Client) CheckSnapshotReady(vmRef ref.Ref, snapshot string) (ready bool, err error) { +func (r *Client) CheckSnapshotReady(vmRef ref.Ref, snapshot string) (ready bool, snapshotId string, err error) { correlationID, err := r.getSnapshotCorrelationID(vmRef, &snapshot) if err != nil { err = liberr.Wrap(err) @@ -104,6 +104,11 @@ func (r *Client) CheckSnapshotReady(vmRef ref.Ref, snapshot string) (ready bool, return } +// CheckSnapshotRemoved implements base.Client +func (r *Client) CheckSnapshotRemoved(vmRef ref.Ref, snapshot string) (bool, error) { + return false, nil +} + // Remove a VM snapshot. No-op for this provider. func (r *Client) RemoveSnapshot(vmRef ref.Ref, snapshot string, hostsFunc util.HostsFunc) (err error) { return diff --git a/pkg/controller/plan/adapter/vsphere/BUILD.bazel b/pkg/controller/plan/adapter/vsphere/BUILD.bazel index c744b9e7c..68d93a55a 100644 --- a/pkg/controller/plan/adapter/vsphere/BUILD.bazel +++ b/pkg/controller/plan/adapter/vsphere/BUILD.bazel @@ -36,6 +36,7 @@ go_library( "//vendor/github.com/vmware/govmomi/find", "//vendor/github.com/vmware/govmomi/object", "//vendor/github.com/vmware/govmomi/session", + "//vendor/github.com/vmware/govmomi/task", "//vendor/github.com/vmware/govmomi/vim25", "//vendor/github.com/vmware/govmomi/vim25/mo", "//vendor/github.com/vmware/govmomi/vim25/soap", diff --git a/pkg/controller/plan/adapter/vsphere/client.go b/pkg/controller/plan/adapter/vsphere/client.go index a48af1af5..e45766854 100644 --- a/pkg/controller/plan/adapter/vsphere/client.go +++ b/pkg/controller/plan/adapter/vsphere/client.go @@ -16,6 +16,7 @@ import ( "github.com/vmware/govmomi" "github.com/vmware/govmomi/object" "github.com/vmware/govmomi/session" + "github.com/vmware/govmomi/task" "github.com/vmware/govmomi/vim25" "github.com/vmware/govmomi/vim25/mo" "github.com/vmware/govmomi/vim25/soap" @@ -27,8 +28,11 @@ import ( ) const ( - snapshotName = "forklift-migration-precopy" - snapshotDesc = "Forklift Operator warm migration precopy" + snapshotName = "forklift-migration-precopy" + snapshotDesc = "Forklift Operator warm migration precopy" + VirtualMachine = "VirtualMachine" + CreateSnapshotTask = "CreateSnapshot_Task" + RemoveSnapshotTask = "RemoveSnapshot_Task" ) // vSphere VM Client @@ -39,9 +43,9 @@ type Client struct { } // Create a VM snapshot and return its ID. -func (r *Client) CreateSnapshot(vmRef ref.Ref, hosts util.HostsFunc) (id string, err error) { +func (r *Client) CreateSnapshot(vmRef ref.Ref, hostsFunc util.HostsFunc) (id string, err error) { r.Log.V(1).Info("Creating snapshot", "vmRef", vmRef) - vm, err := r.getVM(vmRef, hosts) + vm, err := r.getVM(vmRef, hostsFunc) if err != nil { return } @@ -50,28 +54,88 @@ func (r *Client) CreateSnapshot(vmRef ref.Ref, hosts util.HostsFunc) (id string, err = liberr.Wrap(err) return } - res, err := task.WaitForResult(context.TODO(), nil) + return task.Common.Reference().Value, nil +} + +// Check if a snapshot is ready to transfer. +func (r *Client) CheckSnapshotReady(vmRef ref.Ref, snapshot string) (ready bool, snapshotId string, err error) { + taskInfo, err := r.getLatestTaskByName(vmRef, CreateSnapshotTask) if err != nil { - err = liberr.Wrap(err) - return + return false, "", liberr.Wrap(err) + } + if taskInfo == nil { + return false, "", nil } - id = res.Result.(types.ManagedObjectReference).Value - r.Log.Info("Created snapshot", "vmRef", vmRef, "id", id) + ready, err = r.checkTaskStatus(taskInfo) + if err != nil { + return false, "", liberr.Wrap(err) + } + if ready { + return true, taskInfo.Result.(types.ManagedObjectReference).Value, nil + } else { + // The snapshot is not ready, retry the check + return false, "", nil + } +} - return +// Check if a snapshot is removed. +func (r *Client) CheckSnapshotRemoved(vmRef ref.Ref, snapshot string) (ready bool, err error) { + taskInfo, err := r.getLatestTaskByName(vmRef, RemoveSnapshotTask) + if err != nil { + return false, liberr.Wrap(err) + } + if taskInfo == nil { + return false, nil + } + return r.checkTaskStatus(taskInfo) } -// Check if a snapshot is ready to transfer. -func (r *Client) CheckSnapshotReady(vmRef ref.Ref, snapshot string) (ready bool, err error) { - return true, nil +func (r *Client) checkTaskStatus(taskInfo *types.TaskInfo) (ready bool, err error) { + r.Log.Info("Snapshot task", "task", taskInfo.Task.Value, "name", taskInfo.Name, "status", taskInfo.State) + switch taskInfo.State { + case types.TaskInfoStateSuccess: + return true, nil + case types.TaskInfoStateError: + return false, fmt.Errorf(taskInfo.Error.LocalizedMessage) + default: + return false, nil + } +} + +func (r *Client) getLatestTaskByName(vmRef ref.Ref, taskName string) (*types.TaskInfo, error) { + taskManager := task.NewManager(r.client.Client) + taskCollector, err := taskManager.CreateCollectorForTasks(context.TODO(), types.TaskFilterSpec{ + Entity: &types.TaskFilterSpecByEntity{ + Entity: types.ManagedObjectReference{ + Type: VirtualMachine, + Value: vmRef.ID, + }, + Recursion: types.TaskFilterSpecRecursionOptionSelf, + }, + }) + if err != nil { + return nil, err + } + //nolint:errcheck + defer taskCollector.Destroy(context.Background()) + tasks, err := taskCollector.LatestPage(context.TODO()) + if err != nil { + return nil, err + } + for _, taskInfo := range tasks { + if taskInfo.Name == taskName { + return &taskInfo, nil + } + } + return nil, nil } // Remove a VM snapshot. -func (r *Client) RemoveSnapshot(vmRef ref.Ref, snapshot string, hosts util.HostsFunc) (err error) { +func (r *Client) RemoveSnapshot(vmRef ref.Ref, snapshot string, hostsFunc util.HostsFunc) (err error) { r.Log.V(1).Info("RemoveSnapshot", "vmRef", vmRef, "snapshot", snapshot) - err = r.removeSnapshot(vmRef, snapshot, false, hosts) + err = r.removeSnapshot(vmRef, snapshot, false, hostsFunc) return } diff --git a/pkg/controller/plan/migration.go b/pkg/controller/plan/migration.go index 2535e97c8..692fb7edc 100644 --- a/pkg/controller/plan/migration.go +++ b/pkg/controller/plan/migration.go @@ -108,7 +108,6 @@ const ( TransferCompleted = "Transfer completed." PopulatorPodPrefix = "populate-" DvStatusCheckRetriesAnnotation = "dvStatusCheckRetries" - SnapshotRemovalCheckRetries = "snapshotRemovalCheckRetries" ) var ( @@ -1027,27 +1026,15 @@ func (r *Migration) execute(vm *plan.VMStatus) (err error) { vm.AddError(fmt.Sprintf("Step '%s' not found", r.step(vm))) break } - // FIXME: This is just temporary timeout to unblock the migrations which get stuck on issue https://issues.redhat.com/browse/MTV-1753 - // This should be fixed properly by adding the task manager inside the inventory and monitor the task status - // from the main controller. - var retries int - retriesAnnotation := step.Annotations[SnapshotRemovalCheckRetries] - if retriesAnnotation == "" { - step.Annotations[SnapshotRemovalCheckRetries] = "1" - } else { - retries, err = strconv.Atoi(retriesAnnotation) - if err != nil { - step.AddError(err.Error()) - err = nil - break - } - if retries >= settings.Settings.SnapshotRemovalCheckRetries { - vm.Phase = r.next(vm.Phase) - // Reset for next precopy - step.Annotations[SnapshotRemovalCheckRetries] = "1" - } else { - step.Annotations[SnapshotRemovalCheckRetries] = strconv.Itoa(retries + 1) - } + snapshot := vm.Warm.Precopies[len(vm.Warm.Precopies)-1].Snapshot + ready, err := r.provider.CheckSnapshotRemoved(vm.Ref, snapshot) + if err != nil { + step.AddError(err.Error()) + err = nil + break + } + if ready { + vm.Phase = r.next(vm.Phase) } case CreateInitialSnapshot, CreateSnapshot, CreateFinalSnapshot: step, found := vm.FindStep(r.step(vm)) @@ -1076,12 +1063,18 @@ func (r *Migration) execute(vm *plan.VMStatus) (err error) { break } snapshot := vm.Warm.Precopies[len(vm.Warm.Precopies)-1].Snapshot - ready, err := r.provider.CheckSnapshotReady(vm.Ref, snapshot) + ready, snapshotId, err := r.provider.CheckSnapshotReady(vm.Ref, snapshot) if err != nil { step.AddError(err.Error()) err = nil break } + // If the provider does not directly create the snapshot, but we need to wait for the snapshot to be created + // We start the creation task in CreateSnapshot, set the task ID as a snapshot id which needs to be replaced + // by the snapshot id after the task finishes. + if snapshotId != "" { + vm.Warm.Precopies[len(vm.Warm.Precopies)-1].Snapshot = snapshotId + } if ready { vm.Phase = r.next(vm.Phase) } diff --git a/pkg/settings/migration.go b/pkg/settings/migration.go index 354eadd18..0b8096156 100644 --- a/pkg/settings/migration.go +++ b/pkg/settings/migration.go @@ -12,26 +12,25 @@ import ( // Environment variables. const ( - MaxVmInFlight = "MAX_VM_INFLIGHT" - HookRetry = "HOOK_RETRY" - ImporterRetry = "IMPORTER_RETRY" - VirtV2vImage = "VIRT_V2V_IMAGE" - PrecopyInterval = "PRECOPY_INTERVAL" - VirtV2vDontRequestKVM = "VIRT_V2V_DONT_REQUEST_KVM" - SnapshotRemovalTimeout = "SNAPSHOT_REMOVAL_TIMEOUT" - SnapshotStatusCheckRate = "SNAPSHOT_STATUS_CHECK_RATE" - CDIExportTokenTTL = "CDI_EXPORT_TOKEN_TTL" - FileSystemOverhead = "FILESYSTEM_OVERHEAD" - BlockOverhead = "BLOCK_OVERHEAD" - CleanupRetries = "CLEANUP_RETRIES" - DvStatusCheckRetries = "DV_STATUS_CHECK_RETRIES" - SnapshotRemovalCheckRetries = "SNAPSHOT_REMOVAL_CHECK_RETRIES" - OvirtOsConfigMap = "OVIRT_OS_MAP" - VsphereOsConfigMap = "VSPHERE_OS_MAP" - VirtCustomizeConfigMap = "VIRT_CUSTOMIZE_MAP" - VddkJobActiveDeadline = "VDDK_JOB_ACTIVE_DEADLINE" - VirtV2vExtraArgs = "VIRT_V2V_EXTRA_ARGS" - VirtV2vExtraConfConfigMap = "VIRT_V2V_EXTRA_CONF_CONFIG_MAP" + MaxVmInFlight = "MAX_VM_INFLIGHT" + HookRetry = "HOOK_RETRY" + ImporterRetry = "IMPORTER_RETRY" + VirtV2vImage = "VIRT_V2V_IMAGE" + PrecopyInterval = "PRECOPY_INTERVAL" + VirtV2vDontRequestKVM = "VIRT_V2V_DONT_REQUEST_KVM" + SnapshotRemovalTimeout = "SNAPSHOT_REMOVAL_TIMEOUT" + SnapshotStatusCheckRate = "SNAPSHOT_STATUS_CHECK_RATE" + CDIExportTokenTTL = "CDI_EXPORT_TOKEN_TTL" + FileSystemOverhead = "FILESYSTEM_OVERHEAD" + BlockOverhead = "BLOCK_OVERHEAD" + CleanupRetries = "CLEANUP_RETRIES" + DvStatusCheckRetries = "DV_STATUS_CHECK_RETRIES" + OvirtOsConfigMap = "OVIRT_OS_MAP" + VsphereOsConfigMap = "VSPHERE_OS_MAP" + VirtCustomizeConfigMap = "VIRT_CUSTOMIZE_MAP" + VddkJobActiveDeadline = "VDDK_JOB_ACTIVE_DEADLINE" + VirtV2vExtraArgs = "VIRT_V2V_EXTRA_ARGS" + VirtV2vExtraConfConfigMap = "VIRT_V2V_EXTRA_CONF_CONFIG_MAP" ) // Migration settings @@ -62,8 +61,6 @@ type Migration struct { CleanupRetries int // DvStatusCheckRetries retries DvStatusCheckRetries int - // SnapshotRemovalCheckRetries retries - SnapshotRemovalCheckRetries int // oVirt OS config map name OvirtOsConfigMap string // vSphere OS config map name @@ -109,9 +106,6 @@ func (r *Migration) Load() (err error) { if r.DvStatusCheckRetries, err = getPositiveEnvLimit(DvStatusCheckRetries, 10); err != nil { return liberr.Wrap(err) } - if r.SnapshotRemovalCheckRetries, err = getPositiveEnvLimit(SnapshotRemovalCheckRetries, 20); err != nil { - return liberr.Wrap(err) - } if virtV2vImage, ok := os.LookupEnv(VirtV2vImage); ok { r.VirtV2vImage = virtV2vImage } else if Settings.Role.Has(MainRole) {