diff --git a/src/cli/peel/tests.rs b/src/cli/peel/tests.rs index c21557c8..b126318a 100644 --- a/src/cli/peel/tests.rs +++ b/src/cli/peel/tests.rs @@ -35,9 +35,8 @@ fn get_reduced_1090008640() -> PeelArgs { } } -#[track_caller] // should be 40kHz, 2s raw -fn get_merged_1090008640(extra_argv: Vec) -> PeelArgs { +fn get_merged_1090008640(extra_argv: Vec) -> PeelParams { let args = get_reduced_1090008640(); let temp_dir = tempdir().expect("Couldn't make tempdir"); let arg_file = temp_dir.path().join("peel.toml"); @@ -47,7 +46,10 @@ fn get_merged_1090008640(extra_argv: Vec) -> PeelArgs { write!(&mut f, "{ser}").unwrap(); let mut argv = vec!["peel".to_string(), arg_file.display().to_string()]; argv.extend(extra_argv); - PeelArgs::parse_from(argv).merge().unwrap() + let params = PeelArgs::parse_from(argv).merge().unwrap().parse().unwrap(); + drop(f); + drop(temp_dir); + params } // testing the frequency averaging args all works together: @@ -57,13 +59,12 @@ fn get_merged_1090008640(extra_argv: Vec) -> PeelArgs { #[test] fn frequency_averaging_defaults() { - let args = get_merged_1090008640(vec![]); let PeelParams { input_vis_params: InputVisParams { spw: input_spw, .. }, output_vis_params, low_res_spw, .. - } = args.parse().unwrap(); + } = get_merged_1090008640(vec![]); let output_freq_average_factor = match output_vis_params { Some(OutputVisParams { output_freq_average_factor, @@ -78,13 +79,12 @@ fn frequency_averaging_defaults() { #[test] fn frequency_averaging_explicit_output() { - let args = get_merged_1090008640(vec!["--output-vis-freq-average=80kHz".to_string()]); let PeelParams { input_vis_params: InputVisParams { spw: input_spw, .. }, output_vis_params, low_res_spw, .. - } = args.parse().unwrap(); + } = get_merged_1090008640(vec!["--output-vis-freq-average=80kHz".to_string()]); let output_freq_average_factor = match output_vis_params { Some(OutputVisParams { output_freq_average_factor, @@ -99,16 +99,15 @@ fn frequency_averaging_explicit_output() { #[test] fn frequency_averaging_explicit_output_iono() { - let args = get_merged_1090008640(vec![ - "--output-vis-freq-average=80kHz".to_string(), - "--iono-freq-average=320kHz".to_string(), - ]); let PeelParams { input_vis_params: InputVisParams { spw: input_spw, .. }, output_vis_params, low_res_spw, .. - } = args.parse().unwrap(); + } = get_merged_1090008640(vec![ + "--output-vis-freq-average=80kHz".to_string(), + "--iono-freq-average=320kHz".to_string(), + ]); let output_freq_average_factor = match output_vis_params { Some(OutputVisParams { output_freq_average_factor, @@ -123,17 +122,16 @@ fn frequency_averaging_explicit_output_iono() { #[test] fn frequency_averaging_explicit_in_out() { - let args = get_merged_1090008640(vec![ - "--freq-average=80kHz".to_string(), - "--output-vis-freq-average=160kHz".to_string(), - "--iono-freq-average=320kHz".to_string(), - ]); let PeelParams { input_vis_params: InputVisParams { spw: input_spw, .. }, output_vis_params, low_res_spw, .. - } = args.parse().unwrap(); + } = get_merged_1090008640(vec![ + "--freq-average=80kHz".to_string(), + "--output-vis-freq-average=160kHz".to_string(), + "--iono-freq-average=320kHz".to_string(), + ]); let output_freq_average_factor = match output_vis_params { Some(OutputVisParams { output_freq_average_factor, @@ -148,17 +146,16 @@ fn frequency_averaging_explicit_in_out() { #[test] fn frequency_averaging_explicit() { - let args = get_merged_1090008640(vec![ - "--freq-average=80kHz".to_string(), - "--output-vis-freq-average=160kHz".to_string(), - "--iono-freq-average=320kHz".to_string(), - ]); let PeelParams { input_vis_params: InputVisParams { spw: input_spw, .. }, output_vis_params, low_res_spw, .. - } = args.parse().unwrap(); + } = get_merged_1090008640(vec![ + "--freq-average=80kHz".to_string(), + "--output-vis-freq-average=160kHz".to_string(), + "--iono-freq-average=320kHz".to_string(), + ]); let output_freq_average_factor = match output_vis_params { Some(OutputVisParams { output_freq_average_factor, @@ -175,13 +172,12 @@ fn frequency_averaging_explicit() { // time res will be clipped to 2s #[test] fn time_averaging_explicit_output_clip() { - let args = get_merged_1090008640(vec!["--output-vis-time-average=4s".to_string()]); let PeelParams { input_vis_params: InputVisParams { time_res, .. }, output_vis_params, iono_time_average_factor, .. - } = args.parse().unwrap(); + } = get_merged_1090008640(vec!["--output-vis-time-average=4s".to_string()]); let output_time_average_factor = match output_vis_params { Some(OutputVisParams { output_time_average_factor, @@ -200,121 +196,3 @@ fn time_averaging_explicit_output_clip() { // --output-vis-time-average - output averaging settings // // this requires test data with more than one timestep -// -// #[test] -// fn time_averaging_defaults() { -// let args = get_merged_1090008640(vec![]); -// let PeelParams { -// input_vis_params: InputVisParams { time_res, .. }, -// output_vis_params, -// iono_time_average_factor, -// .. -// } = args.parse().unwrap(); -// let output_time_average_factor = match output_vis_params { -// Some(OutputVisParams { -// output_time_average_factor, -// .. -// }) => output_time_average_factor, -// _ => panic!("Expected OutputVisParams::Single"), -// }; -// assert_abs_diff_eq!(time_res.to_seconds(), 2.0); -// assert_eq!(iono_time_average_factor.get(), 4); -// assert_eq!(output_time_average_factor.get(), 1); -// } - -// #[test] -// fn time_averaging_explicit_output() { -// let args = get_merged_1090008640(vec!["--output-vis-time-average=4s".to_string()]); -// let PeelParams { -// input_vis_params: InputVisParams { time_res, .. }, -// output_vis_params, -// iono_time_average_factor, -// .. -// } = args.parse().unwrap(); -// let output_time_average_factor = match output_vis_params { -// Some(OutputVisParams { -// output_time_average_factor, -// .. -// }) => output_time_average_factor, -// _ => panic!("Expected OutputVisParams::Single"), -// }; -// assert_abs_diff_eq!(time_res.to_seconds(), 2.0); -// assert_eq!(iono_time_average_factor.get(), 4); -// assert_eq!(output_time_average_factor.get(), 2); -// } - -// #[test] -// fn time_averaging_explicit_output_iono() { -// let args = get_merged_1090008640(vec![ -// "--output-vis-time-average=4s".to_string(), -// "--iono-time-average=16s".to_string(), -// ]); -// let PeelParams { -// input_vis_params: InputVisParams { time_res, .. }, -// output_vis_params, -// iono_time_average_factor, -// .. -// } = args.parse().unwrap(); -// let output_time_average_factor = match output_vis_params { -// Some(OutputVisParams { -// output_time_average_factor, -// .. -// }) => output_time_average_factor, -// _ => panic!("Expected OutputVisParams::Single"), -// }; -// assert_abs_diff_eq!(time_res.to_seconds(), 2.0); -// assert_eq!(iono_time_average_factor.get(), 8); -// assert_eq!(output_time_average_factor.get(), 2); -// } - -// #[test] -// fn time_averaging_explicit_in_out() { -// // enable logging -// use env_logger::Env; -// env_logger::Builder::from_env(Env::default().default_filter_or("debug")).init(); -// let args = get_merged_1090008640(vec![ -// "--time-average=4s".to_string(), -// "--output-vis-time-average=16s".to_string(), -// ]); -// let PeelParams { -// input_vis_params: InputVisParams { time_res, .. }, -// output_vis_params, -// iono_time_average_factor, -// .. -// } = args.parse().unwrap(); -// let output_time_average_factor = match output_vis_params { -// Some(OutputVisParams { -// output_time_average_factor, -// .. -// }) => output_time_average_factor, -// _ => panic!("Expected OutputVisParams::Single"), -// }; -// assert_abs_diff_eq!(time_res.to_seconds(), 4.0); -// assert_eq!(iono_time_average_factor.get(), 2); -// assert_eq!(output_time_average_factor.get(), 8); -// } - -// #[test] -// fn time_averaging_explicit() { -// let args = get_merged_1090008640(vec![ -// "--time-average=4s".to_string(), -// "--output-vis-time-average=16s".to_string(), -// "--iono-time-average=32s".to_string(), -// ]); -// let PeelParams { -// input_vis_params: InputVisParams { time_res, .. }, -// output_vis_params, -// iono_time_average_factor, -// .. -// } = args.parse().unwrap(); -// let output_time_average_factor = match output_vis_params { -// Some(OutputVisParams { -// output_time_average_factor, -// .. -// }) => output_time_average_factor, -// _ => panic!("Expected OutputVisParams::Single"), -// }; -// assert_abs_diff_eq!(time_res.to_seconds(), 4.0); -// assert_eq!(iono_time_average_factor.get(), 4); -// assert_eq!(output_time_average_factor.get(), 8); -// } diff --git a/src/params/peel/gpu.rs b/src/params/peel/gpu.rs new file mode 100644 index 00000000..d280a054 --- /dev/null +++ b/src/params/peel/gpu.rs @@ -0,0 +1,971 @@ +use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; +use itertools::{izip, Itertools}; +use log::{debug, info, trace, warn}; +use marlu::{ + constants::VEL_C, + pos::xyz::xyzs_to_cross_uvws, + precession::{get_lmst, precess_time}, + Jones, RADec, XyzGeodetic, UVW, +}; +use ndarray::prelude::*; +use num_complex::Complex; +use rayon::prelude::*; +use std::{ + cmp::Ordering, + collections::{HashMap, HashSet}, +}; + +use crate::{ + averaging::Timeblock, + context::ObsContext, + di_calibrate::calibrate_timeblock, + gpu::{self, gpu_kernel_call, DevicePointer, GpuError, GpuFloat}, + model::{SkyModeller, SkyModellerGpu}, + srclist::{ComponentType, FluxDensity, FluxDensityType, SourceList}, + Chanblock, TileBaselineFlags, +}; + +use super::{weights_average, IonoConsts, PeelError, UV, W}; + +// custom sorting implementations +impl PartialOrd for BadSource { + fn partial_cmp(&self, other: &Self) -> Option { + match self.source_name.partial_cmp(&other.source_name) { + // Some(Ordering::Equal) => match self.gpstime.partial_cmp(&other.gpstime) { + Some(Ordering::Equal) => match self.pass.partial_cmp(&other.pass) { + Some(Ordering::Less) => Some(Ordering::Greater), + Some(Ordering::Greater) => Some(Ordering::Less), + other => other, + }, + // other => other, + // }, + other => other, + } + } +} + +#[derive(Debug, PartialEq)] //, Serialize, Deserialize)] +pub(crate) struct BadSource { + // timeblock: Timeblock, + pub(crate) gpstime: f64, + pub(crate) pass: usize, + // pub(crate) gsttime: Epoch, + // pub(crate) times: Vec, + // source: Source, + pub(crate) i_source: usize, + pub(crate) source_name: String, + // pub(crate) weighted_catalogue_pos_j2000: RADec, + // iono_consts: IonoConsts, + pub(crate) alpha: f64, + pub(crate) beta: f64, + pub(crate) gain: f64, + // pub(crate) alphas: Vec, + // pub(crate) betas: Vec, + // pub(crate) gains: Vec, + pub(crate) residuals_i: Vec>, + pub(crate) residuals_q: Vec>, + pub(crate) residuals_u: Vec>, + pub(crate) residuals_v: Vec>, +} + +#[allow(clippy::too_many_arguments)] +pub(crate) fn peel_gpu( + mut vis_residual_tfb: ArrayViewMut3>, + vis_weights_tfb: ArrayView3, + timeblock: &Timeblock, + source_list: &SourceList, + iono_consts: &mut [IonoConsts], + source_weighted_positions: &[RADec], + num_passes: usize, + num_loops: usize, + convergence: f64, + chanblocks: &[Chanblock], + low_res_lambdas_m: &[f64], + obs_context: &ObsContext, + tile_baseline_flags: &TileBaselineFlags, + high_res_modeller: &mut SkyModellerGpu, + no_precession: bool, + multi_progress_bar: &MultiProgress, +) -> Result<(), PeelError> { + let array_position = obs_context.array_position; + let dut1 = obs_context.dut1.unwrap_or_default(); + + let all_fine_chan_lambdas_m = chanblocks + .iter() + .map(|c| VEL_C / c.freq) + .collect::>(); + + let flagged_tiles = &tile_baseline_flags.flagged_tiles; + let unflagged_tile_xyzs: Vec = obs_context + .tile_xyzs + .par_iter() + .enumerate() + .filter(|(tile_index, _)| !flagged_tiles.contains(tile_index)) + .map(|(_, xyz)| *xyz) + .collect(); + + let timestamps = &timeblock.timestamps; + + let num_timesteps = vis_residual_tfb.len_of(Axis(0)); + let num_tiles = unflagged_tile_xyzs.len(); + let num_cross_baselines = (num_tiles * (num_tiles - 1)) / 2; + let num_high_res_chans = all_fine_chan_lambdas_m.len(); + let num_high_res_chans_spw = chanblocks.len(); + assert_eq!( + num_high_res_chans, num_high_res_chans_spw, + "chans from fine_chan_lambdas {} != chans in high_res_chanblocks (flagged+unflagged) {}", + num_high_res_chans, num_high_res_chans_spw + ); + + let num_low_res_chans = low_res_lambdas_m.len(); + assert!( + num_high_res_chans % num_low_res_chans == 0, + "TODO: averaging can't deal with non-integer ratios. channels high {} low {}", + num_high_res_chans, + num_low_res_chans + ); + + let num_timesteps_i32: i32 = num_timesteps.try_into().expect("smaller than i32::MAX"); + let num_tiles_i32: i32 = num_tiles.try_into().expect("smaller than i32::MAX"); + let num_cross_baselines_i32: i32 = num_cross_baselines + .try_into() + .expect("smaller than i32::MAX"); + let num_high_res_chans_i32 = num_high_res_chans + .try_into() + .expect("smaller than i32::MAX"); + let num_low_res_chans_i32 = num_low_res_chans.try_into().expect("smaller than i32::MAX"); + + let num_sources_to_iono_subtract = iono_consts.len(); + + let (time_axis, freq_axis, baseline_axis) = (Axis(0), Axis(1), Axis(2)); + + assert_eq!(vis_residual_tfb.len_of(time_axis), timestamps.len()); + assert_eq!(vis_weights_tfb.len_of(time_axis), timestamps.len()); + + assert_eq!(vis_residual_tfb.len_of(baseline_axis), num_cross_baselines); + assert_eq!(vis_weights_tfb.len_of(baseline_axis), num_cross_baselines); + + assert_eq!(vis_residual_tfb.len_of(freq_axis), num_high_res_chans); + assert_eq!(vis_weights_tfb.len_of(freq_axis), num_high_res_chans); + + assert!(num_sources_to_iono_subtract <= source_list.len()); + + if num_sources_to_iono_subtract == 0 { + return Ok(()); + } + + let timestamps = &timeblock.timestamps; + let peel_progress = multi_progress_bar.add( + ProgressBar::new(num_sources_to_iono_subtract as _) + .with_style( + ProgressStyle::default_bar() + .template("{msg:17}: [{wide_bar:.blue}] {pos:2}/{len:2} sources ({elapsed_precise}<{eta_precise})").unwrap() + .progress_chars("=> "), + ) + .with_position(0) + ); + peel_progress.tick(); + + macro_rules! pb_warn { ($($arg:tt)+) => (multi_progress_bar.suspend(|| warn!($($arg)+))) } + macro_rules! pb_info { ($($arg:tt)+) => (multi_progress_bar.suspend(|| info!($($arg)+))) } + macro_rules! pb_debug { ($($arg:tt)+) => (multi_progress_bar.suspend(|| debug!($($arg)+))) } + macro_rules! pb_trace { ($($arg:tt)+) => (multi_progress_bar.suspend(|| trace!($($arg)+))) } + + let mut lmsts = vec![0.; timestamps.len()]; + let mut latitudes = vec![0.; timestamps.len()]; + let mut tile_xyzs_high_res = Array2::::default((timestamps.len(), num_tiles)); + let mut high_res_uvws = Array2::default((timestamps.len(), num_cross_baselines)); + let mut tile_uvs_high_res = Array2::::default((timestamps.len(), num_tiles)); + let mut tile_ws_high_res = Array2::::default((timestamps.len(), num_tiles)); + + // Pre-compute high-res tile UVs and Ws at observation phase centre. + for ( + &time, + lmst, + latitude, + mut tile_xyzs_high_res, + mut high_res_uvws, + mut tile_uvs_high_res, + mut tile_ws_high_res, + ) in izip!( + timestamps.iter(), + lmsts.iter_mut(), + latitudes.iter_mut(), + tile_xyzs_high_res.outer_iter_mut(), + high_res_uvws.outer_iter_mut(), + tile_uvs_high_res.outer_iter_mut(), + tile_ws_high_res.outer_iter_mut(), + ) { + if !no_precession { + let precession_info = precess_time( + array_position.longitude_rad, + array_position.latitude_rad, + obs_context.phase_centre, + time, + dut1, + ); + tile_xyzs_high_res + .iter_mut() + .zip_eq(&precession_info.precess_xyz(&unflagged_tile_xyzs)) + .for_each(|(a, b)| *a = *b); + *lmst = precession_info.lmst_j2000; + *latitude = precession_info.array_latitude_j2000; + } else { + tile_xyzs_high_res + .iter_mut() + .zip_eq(&unflagged_tile_xyzs) + .for_each(|(a, b)| *a = *b); + *lmst = get_lmst(array_position.longitude_rad, time, dut1); + *latitude = array_position.latitude_rad; + }; + let hadec_phase = obs_context.phase_centre.to_hadec(*lmst); + let (s_ha, c_ha) = hadec_phase.ha.sin_cos(); + let (s_dec, c_dec) = hadec_phase.dec.sin_cos(); + let mut tile_uvws_high_res = vec![UVW::default(); num_tiles]; + for (tile_uvw, tile_uv, tile_w, &tile_xyz) in izip!( + tile_uvws_high_res.iter_mut(), + tile_uvs_high_res.iter_mut(), + tile_ws_high_res.iter_mut(), + tile_xyzs_high_res.iter(), + ) { + let uvw = UVW::from_xyz_inner(tile_xyz, s_ha, c_ha, s_dec, c_dec); + *tile_uvw = uvw; + *tile_uv = UV { u: uvw.u, v: uvw.v }; + *tile_w = W(uvw.w); + } + + // The UVWs for every timestep will be the same (because the phase + // centres are always the same). Make these ahead of time for + // efficiency. + let mut count = 0; + for (i, t1) in tile_uvws_high_res.iter().enumerate() { + for t2 in tile_uvws_high_res.iter().skip(i + 1) { + high_res_uvws[count] = *t1 - *t2; + count += 1; + } + } + } + + // ////////////////// // + // LOW RES PRECESSION // + // ////////////////// // + + let average_timestamp = timeblock.median; + let (average_lmst, _average_latitude, average_tile_xyzs) = if no_precession { + let average_tile_xyzs = + ArrayView2::from_shape((1, num_tiles), &unflagged_tile_xyzs).expect("correct shape"); + ( + get_lmst(array_position.longitude_rad, average_timestamp, dut1), + array_position.latitude_rad, + CowArray::from(average_tile_xyzs), + ) + } else { + let average_precession_info = precess_time( + array_position.longitude_rad, + array_position.latitude_rad, + obs_context.phase_centre, + average_timestamp, + dut1, + ); + let average_precessed_tile_xyzs = Array2::from_shape_vec( + (1, num_tiles), + average_precession_info.precess_xyz(&unflagged_tile_xyzs), + ) + .expect("correct shape"); + + ( + average_precession_info.lmst_j2000, + average_precession_info.array_latitude_j2000, + CowArray::from(average_precessed_tile_xyzs), + ) + }; + + let gpu_xyzs_high_res: Vec<_> = tile_xyzs_high_res + .iter() + .copied() + .map(|XyzGeodetic { x, y, z }| gpu::XYZ { + x: x as GpuFloat, + y: y as GpuFloat, + z: z as GpuFloat, + }) + .collect(); + let d_xyzs = DevicePointer::copy_to_device(&gpu_xyzs_high_res)?; + + // temporary arrays for accumulation + let vis_residual_low_res_fb: Array3> = + Array3::zeros((1, num_low_res_chans, num_cross_baselines)); + let mut vis_weights_low_res_fb: Array3 = Array3::zeros(vis_residual_low_res_fb.raw_dim()); + + // The low-res weights only need to be populated once. + weights_average(vis_weights_tfb.view(), vis_weights_low_res_fb.view_mut()); + + let freq_average_factor: i32 = (all_fine_chan_lambdas_m.len() / num_low_res_chans) + .try_into() + .expect("smaller than i32::MAX"); + + let mut bad_sources = Vec::::new(); + + unsafe { + let mut gpu_uvws: ArrayBase, Dim<[usize; 2]>> = + Array2::default((num_timesteps, num_cross_baselines)); + gpu_uvws + .outer_iter_mut() + .zip(tile_xyzs_high_res.outer_iter()) + .zip(lmsts.iter()) + .for_each(|((mut gpu_uvws, xyzs), lmst)| { + let phase_centre = obs_context.phase_centre.to_hadec(*lmst); + let v = xyzs_to_cross_uvws(xyzs.as_slice().unwrap(), phase_centre) + .into_iter() + .map(|uvw| gpu::UVW { + u: uvw.u as GpuFloat, + v: uvw.v as GpuFloat, + w: uvw.w as GpuFloat, + }) + .collect::>(); + gpu_uvws.assign(&ArrayView1::from(&v)); + }); + let mut d_uvws_from = DevicePointer::copy_to_device(gpu_uvws.as_slice().unwrap())?; + let mut d_uvws_to = + DevicePointer::malloc(gpu_uvws.len() * std::mem::size_of::())?; + + let gpu_lmsts: Vec = lmsts.iter().map(|l| *l as GpuFloat).collect(); + let d_lmsts = DevicePointer::copy_to_device(&gpu_lmsts)?; + + let gpu_lambdas: Vec = all_fine_chan_lambdas_m + .iter() + .map(|l| *l as GpuFloat) + .collect(); + let d_lambdas = DevicePointer::copy_to_device(&gpu_lambdas)?; + + let gpu_xyzs_low_res: Vec<_> = average_tile_xyzs + .iter() + .copied() + .map(|XyzGeodetic { x, y, z }| gpu::XYZ { + x: x as GpuFloat, + y: y as GpuFloat, + z: z as GpuFloat, + }) + .collect(); + let d_xyzs_low_res = DevicePointer::copy_to_device(&gpu_xyzs_low_res)?; + + let gpu_low_res_lambdas: Vec = + low_res_lambdas_m.iter().map(|l| *l as GpuFloat).collect(); + let d_low_res_lambdas = DevicePointer::copy_to_device(&gpu_low_res_lambdas)?; + + let d_average_lmsts = DevicePointer::copy_to_device(&[average_lmst as GpuFloat])?; + let mut d_low_res_uvws: DevicePointer = + DevicePointer::malloc(gpu_uvws.len() * std::mem::size_of::())?; + // Make the amount of elements in `d_iono_fits` a power of 2, for + // efficiency. + let mut d_iono_fits = { + let min_size = + num_cross_baselines * num_low_res_chans * std::mem::size_of::>(); + let n = (min_size as f64).log2().ceil() as u32; + let size = 2_usize.pow(n); + let mut d: DevicePointer> = DevicePointer::malloc(size).unwrap(); + d.clear(); + d + }; + + let mut d_high_res_vis_tfb = + DevicePointer::copy_to_device(vis_residual_tfb.as_slice().unwrap())?; + let mut d_high_res_vis2_tfb = + DevicePointer::copy_to_device(vis_residual_tfb.as_slice().unwrap())?; + let d_high_res_weights_tfb = + DevicePointer::copy_to_device(vis_weights_tfb.as_slice().unwrap())?; + + let mut d_low_res_vis_fb = + DevicePointer::copy_to_device(vis_residual_low_res_fb.as_slice().unwrap())?; + let d_low_res_weights_fb = + DevicePointer::copy_to_device(vis_weights_low_res_fb.as_slice().unwrap())?; + + let mut d_high_res_model_tfb: DevicePointer> = DevicePointer::malloc( + timestamps.len() + * num_cross_baselines + * all_fine_chan_lambdas_m.len() + * std::mem::size_of::>(), + )?; + let mut d_low_res_model_fb = + DevicePointer::copy_to_device(vis_residual_low_res_fb.as_slice().unwrap())?; + let mut d_low_res_model_rotated = + DevicePointer::copy_to_device(vis_residual_low_res_fb.as_slice().unwrap())?; + + // One pointer per timestep. + let mut d_uvws = Vec::with_capacity(high_res_uvws.len_of(Axis(0))); + // Temp vector to store results. + let mut gpu_uvws = vec![gpu::UVW::default(); high_res_uvws.len_of(Axis(1))]; + for uvws in high_res_uvws.outer_iter() { + // Convert the type and push the results to the device, + // saving the resulting pointer. + uvws.iter() + .zip_eq(gpu_uvws.iter_mut()) + .for_each(|(&UVW { u, v, w }, gpu_uvw)| { + *gpu_uvw = gpu::UVW { + u: u as GpuFloat, + v: v as GpuFloat, + w: w as GpuFloat, + } + }); + d_uvws.push(DevicePointer::copy_to_device(&gpu_uvws)?); + } + let mut d_beam_jones = DevicePointer::default(); + + for pass in 0..num_passes { + peel_progress.reset(); + peel_progress.set_message(format!( + "Peeling timeblock {}, pass {}", + timeblock.index + 1, + pass + 1 + )); + + let mut pass_issues = 0; + let mut pass_alpha_mag = 0.; + let mut pass_bega_mag = 0.; + let mut pass_gain_mag = 0.; + + // this needs to be inside the pass loop, because d_uvws_from gets swapped with d_uvws_to + gpu_kernel_call!( + gpu::xyzs_to_uvws, + d_xyzs.get(), + d_lmsts.get(), + d_uvws_from.get_mut(), + gpu::RADec { + ra: obs_context.phase_centre.ra as GpuFloat, + dec: obs_context.phase_centre.dec as GpuFloat, + }, + num_tiles_i32, + num_cross_baselines_i32, + num_timesteps_i32 + )?; + + for (i_source, (((source_name, source), iono_consts), source_pos)) in source_list + .iter() + .take(num_sources_to_iono_subtract) + .zip_eq(iono_consts.iter_mut()) + .zip(source_weighted_positions.iter().copied()) + .enumerate() + { + let start = std::time::Instant::now(); + // pb_debug!( + // "peel loop {pass}: {source_name} at {source_pos} (has iono {iono_consts:?})" + // ); + + // let old_iono_consts = *iono_consts; + let old_iono_consts = IonoConsts { + alpha: iono_consts.alpha, + beta: iono_consts.beta, + gain: iono_consts.gain, + }; + let gpu_old_iono_consts = gpu::IonoConsts { + alpha: old_iono_consts.alpha, + beta: old_iono_consts.beta, + gain: old_iono_consts.gain, + }; + + gpu_kernel_call!( + gpu::xyzs_to_uvws, + d_xyzs.get(), + d_lmsts.get(), + d_uvws_to.get_mut(), + gpu::RADec { + ra: source_pos.ra as GpuFloat, + dec: source_pos.dec as GpuFloat, + }, + num_tiles_i32, + num_cross_baselines_i32, + num_timesteps_i32, + )?; + pb_trace!("{:?}: xyzs_to_uvws", start.elapsed()); + + // rotate d_high_res_vis_tfb in place + gpu_kernel_call!( + gpu::rotate, + d_high_res_vis_tfb.get_mut().cast(), + num_timesteps_i32, + num_cross_baselines_i32, + num_high_res_chans_i32, + d_uvws_from.get(), + d_uvws_to.get(), + d_lambdas.get() + )?; + pb_trace!("{:?}: rotate", start.elapsed()); + + // there's a bug in the modeller where it ignores --no-precession + // high_res_modeller.update_with_a_source(source, obs_context.phase_centre)?; + high_res_modeller.update_with_a_source(source, source_pos)?; + // Clear the old memory before reusing the buffer. + d_high_res_model_tfb.clear(); + for (i_time, (lmst, latitude)) in lmsts.iter().zip(latitudes.iter()).enumerate() { + let original_model_ptr = d_high_res_model_tfb.ptr; + d_high_res_model_tfb.ptr = d_high_res_model_tfb + .ptr + .add(i_time * num_cross_baselines * all_fine_chan_lambdas_m.len()); + let original_uvw_ptr = d_uvws_to.ptr; + d_uvws_to.ptr = d_uvws_to.ptr.add(i_time * num_cross_baselines); + high_res_modeller.model_timestep_with( + *lmst, + *latitude, + &d_uvws_to, + &mut d_beam_jones, + &mut d_high_res_model_tfb, + )?; + d_high_res_model_tfb.ptr = original_model_ptr; + d_uvws_to.ptr = original_uvw_ptr; + } + pb_trace!("{:?}: high res model", start.elapsed()); + + // d_high_res_vis2_tfb = residuals@src. + d_high_res_vis_tfb.copy_to(&mut d_high_res_vis2_tfb)?; + // add iono@src to residuals@src + gpu_kernel_call!( + gpu::add_model, + d_high_res_vis2_tfb.get_mut().cast(), + d_high_res_model_tfb.get().cast(), + gpu_old_iono_consts, + d_lambdas.get(), + d_uvws_to.get(), + num_timesteps_i32, + num_high_res_chans_i32, + num_cross_baselines_i32, + )?; + pb_trace!("{:?}: add_model", start.elapsed()); + + // *** UGLY HACK *** + // d_high_res_model_rotated = iono@src + let mut d_high_res_model_rotated: DevicePointer> = + DevicePointer::malloc(d_high_res_model_tfb.get_size())?; + d_high_res_model_rotated.clear(); + gpu_kernel_call!( + gpu::add_model, + d_high_res_model_rotated.get_mut().cast(), + d_high_res_model_tfb.get().cast(), + gpu_old_iono_consts, + d_lambdas.get(), + d_uvws_to.get(), + num_timesteps_i32, + num_high_res_chans_i32, + num_cross_baselines_i32, + )?; + pb_trace!("{:?}: add_model", start.elapsed()); + + gpu_kernel_call!( + gpu::average, + d_high_res_vis2_tfb.get().cast(), + d_high_res_weights_tfb.get(), + d_low_res_vis_fb.get_mut().cast(), + num_timesteps_i32, + num_cross_baselines_i32, + num_high_res_chans_i32, + freq_average_factor + )?; + pb_trace!("{:?}: average high res vis", start.elapsed()); + + gpu_kernel_call!( + gpu::average, + d_high_res_model_rotated.get().cast(), + d_high_res_weights_tfb.get(), + d_low_res_model_fb.get_mut().cast(), + num_timesteps_i32, + num_cross_baselines_i32, + num_high_res_chans_i32, + freq_average_factor + )?; + pb_trace!("{:?}: average high res model", start.elapsed()); + + gpu_kernel_call!( + gpu::xyzs_to_uvws, + d_xyzs_low_res.get(), + d_average_lmsts.get(), + d_low_res_uvws.get_mut(), + gpu::RADec { + ra: source_pos.ra as GpuFloat, + dec: source_pos.dec as GpuFloat, + }, + num_tiles_i32, + num_cross_baselines_i32, + 1, + )?; + pb_trace!("{:?}: low res xyzs_to_uvws", start.elapsed()); + + // !!!! + // comment me out + // !!!! + // we have low res residual and model, now print what iono fit will see. + // { + // use marlu::math::cross_correlation_baseline_to_tiles; + // let vis_residual_low_res_fb = d_low_res_vis_fb.copy_from_device_new()?; + // dbg!(vis_residual_low_res_fb.len(), num_low_res_chans, num_cross_baselines); + // let vis_residual_low_res_fb = Array2::from_shape_vec( + // (num_low_res_chans, num_cross_baselines), + // vis_residual_low_res_fb, + // ) + // .unwrap(); + // let vis_model_low_res_fb = d_low_res_model_fb.copy_from_device_new()?; + // let vis_model_low_res_fb = Array2::>::from_shape_vec( + // (num_low_res_chans, num_cross_baselines), + // vis_model_low_res_fb, + // ) + // .unwrap(); + // let vis_weights_low_res_fb = d_low_res_weights_fb.copy_from_device_new()?; + // let vis_weights_low_res_fb = Array2::::from_shape_vec( + // (num_low_res_chans, num_cross_baselines), + // vis_weights_low_res_fb, + // ) + // .unwrap(); + // let ant_pairs = (0..num_cross_baselines) + // .map(|bl_idx| cross_correlation_baseline_to_tiles(num_tiles, bl_idx)) + // .collect_vec(); + // let uvws_low_res = d_low_res_uvws.copy_from_device_new()?; + // for (ch_idx, (vis_residual_low_res_b, vis_model_low_res_b, vis_weights_low_res_b, &lambda)) in izip!( + // vis_residual_low_res_fb.outer_iter(), + // vis_model_low_res_fb.outer_iter(), + // vis_weights_low_res_fb.outer_iter(), + // low_res_lambdas_m, + // ).enumerate() { + // // dbg!(&ch_idx); + // if ch_idx > 1 { + // continue; + // } + // for (residual, model, weight, &gpu::UVW { u, v, w: _ }, &(ant1, ant2)) in izip!( + // vis_residual_low_res_b.iter(), + // vis_model_low_res_b.iter(), + // vis_weights_low_res_b.iter(), + // &uvws_low_res, + // ant_pairs.iter(), + // ) { + // // dbg!(&ant1, &ant2); + // if ant1 != 0 || (ant2 >= 16 && ant2 < num_tiles_i32 as usize - 16) { + // continue; + // } + // let residual_i = residual[0] + residual[3]; + // let model_i = model[0] + model[3]; + // let u = u as GpuFloat; + // let v = v as GpuFloat; + // println!("uv {ant1:3} {ant2:3} ({u:+9.3}, {v:+9.3}) l{lambda:+7.5} wt{weight:+3.1} | RI {:+11.7} @{:+5.3}pi | MI {:+11.7} @{:+5.3}pi", residual_i.norm(), residual_i.arg(), model_i.norm(), model_i.arg()); + // } + // } + // } + + let mut gpu_iono_consts = gpu::IonoConsts { + alpha: 0.0, + beta: 0.0, + gain: 1.0, + }; + // get size of device ptr + let lrblch = (num_cross_baselines_i32 * num_low_res_chans_i32) as f64; + pb_trace!("before iono_loop nt{:?} nxbl{:?} nlrch{:?} = lrxblch{:?}; lrvfb{:?} lrwfb{:?} lrmfb{:?} lrmrfb{:?}", + num_tiles_i32, + num_cross_baselines_i32, + num_low_res_chans_i32, + lrblch, + d_low_res_vis_fb.get_size() as f64 / lrblch, + d_low_res_weights_fb.get_size() as f64 / lrblch, + d_low_res_model_fb.get_size() as f64 / lrblch, + d_low_res_model_rotated.get_size() as f64 / lrblch, + ); + gpu_kernel_call!( + gpu::iono_loop, + d_low_res_vis_fb.get().cast(), + d_low_res_weights_fb.get(), + d_low_res_model_fb.get().cast(), + d_low_res_model_rotated.get_mut().cast(), + d_iono_fits.get_mut().cast(), + &mut gpu_iono_consts, + num_cross_baselines_i32, + num_low_res_chans_i32, + num_loops as i32, + d_low_res_uvws.get(), + d_low_res_lambdas.get(), + convergence as GpuFloat, + )?; + pb_trace!("{:?}: iono_loop", start.elapsed()); + + iono_consts.alpha = old_iono_consts.alpha + gpu_iono_consts.alpha; + iono_consts.beta = old_iono_consts.beta + gpu_iono_consts.beta; + iono_consts.gain = old_iono_consts.gain * gpu_iono_consts.gain; + + #[rustfmt::skip] + let issues = format!( + "{}{}{}", + if iono_consts.alpha.abs() > 1e-3 { + if iono_consts.alpha > 0.0 { "A" } else { "a" } + } else { + "" + }, + if iono_consts.beta.abs() > 1e-3 { + if iono_consts.beta > 0.0 { "B" } else { "b" } + } else { + "" + }, + if iono_consts.gain < 0.0 { + "g" + } else if iono_consts.gain > 1.5 { + "G" + } else { + "" + }, + ); + let message = format!( + // "t{:03} p{pass} s{i_source:6}|{source_name:16} @ ra {:+7.2} d {:+7.2} | a {:+7.2e} b {:+7.2e} g {:+3.2} | da {:+8.2e} db {:+8.2e} dg {:+3.2} | {}", + "t{:3} pass {:2} s{i_source:6}|{source_name:16} @ ra {:+7.2} d {:+7.2} | a {:+8.6} b {:+8.6} g {:+3.2} | da {:+8.6} db {:+8.6} dg {:+3.2} | {}", + timeblock.index, + pass+1, + (source_pos.ra.to_degrees() + 180.) % 360. - 180., + source_pos.dec.to_degrees(), + iono_consts.alpha, + iono_consts.beta, + iono_consts.gain, + iono_consts.alpha - old_iono_consts.alpha, + iono_consts.beta - old_iono_consts.beta, + iono_consts.gain - old_iono_consts.gain, + issues, + ); + if issues.is_empty() { + pb_debug!("[peel_gpu] {}", message); + pass_alpha_mag += (iono_consts.alpha - old_iono_consts.alpha).abs(); + pass_bega_mag += (iono_consts.beta - old_iono_consts.beta).abs(); + pass_gain_mag += iono_consts.gain - old_iono_consts.gain; + } else { + pb_debug!( + "[peel_gpu] {} (reverting to a {:+8.6} b {:+8.6} g {:+3.2})", + message, + old_iono_consts.alpha, + old_iono_consts.beta, + old_iono_consts.gain, + ); + bad_sources.push(BadSource { + gpstime: timeblock.median.to_gpst_seconds(), + pass, + i_source, + source_name: source_name.to_string(), + // weighted_catalogue_pos_j2000: source_pos, + alpha: iono_consts.alpha, + beta: iono_consts.beta, + gain: iono_consts.gain, + residuals_i: Vec::default(), // todo!(), + residuals_q: Vec::default(), // todo!(), + residuals_u: Vec::default(), // todo!(), + residuals_v: Vec::default(), // todo!(), + }); + + iono_consts.alpha = old_iono_consts.alpha; + iono_consts.beta = old_iono_consts.beta; + iono_consts.gain = old_iono_consts.gain; + pass_issues += 1; + } + + let gpu_iono_consts = gpu::IonoConsts { + alpha: iono_consts.alpha, + beta: iono_consts.beta, + gain: iono_consts.gain, + }; + + pb_trace!("subtracting {gpu_old_iono_consts:?} adding {gpu_iono_consts:?}"); + + // vis += iono(model, old_consts) - iono(model, consts) + gpu_kernel_call!( + gpu::subtract_iono, + d_high_res_vis_tfb.get_mut().cast(), + d_high_res_model_tfb.get().cast(), + gpu_iono_consts, + gpu_old_iono_consts, + d_uvws_to.get(), + d_lambdas.get(), + num_timesteps_i32, + num_cross_baselines_i32, + num_high_res_chans_i32, + )?; + pb_trace!("{:?}: subtract_iono", start.elapsed()); + + // Peel? + let num_sources_to_peel = 0; + if pass == num_passes - 1 && i_source < num_sources_to_peel { + // We currently can only do DI calibration on the CPU. Copy the visibilities back to the host. + let vis = d_high_res_vis_tfb.copy_from_device_new()?; + + // *** UGLY HACK *** + let mut d_high_res_model_rotated: DevicePointer> = + DevicePointer::malloc(d_high_res_model_tfb.get_size())?; + d_high_res_model_rotated.clear(); + gpu_kernel_call!( + gpu::add_model, + d_high_res_model_rotated.get_mut().cast(), + d_high_res_model_tfb.get().cast(), + //iono_consts.0 as GpuFloat, + //iono_consts.1 as GpuFloat, + gpu_iono_consts, + d_lambdas.get(), + d_uvws_to.get(), + num_timesteps_i32, + num_high_res_chans_i32, + num_cross_baselines_i32, + )?; + let model = d_high_res_model_rotated.copy_from_device_new()?; + + let mut di_jones = + Array3::from_elem((1, num_tiles, num_high_res_chans), Jones::identity()); + let shape = (num_timesteps, num_high_res_chans, num_cross_baselines); + let pb = ProgressBar::hidden(); + let di_cal_results = calibrate_timeblock( + ArrayView3::from_shape(shape, &vis).expect("correct shape"), + ArrayView3::from_shape(shape, &model).expect("correct shape"), + di_jones.view_mut(), + timeblock, + chanblocks, + 50, + 1e-8, + 1e-4, + obs_context.polarisations, + pb, + true, + ); + if di_cal_results.into_iter().all(|r| r.converged) { + // Apply. + } + } + + // The new phase centre becomes the old one. + std::mem::swap(&mut d_uvws_from, &mut d_uvws_to); + + peel_progress.inc(1); + } + + let num_good_sources = (num_sources_to_iono_subtract - pass_issues) as f64; + if num_good_sources > 0. { + let msg = format!( + "t{:3} pass {:2} ma {:+7.2e} mb {:+7.2e} mg {:+7.2e}", + timeblock.index, + pass + 1, + pass_alpha_mag / num_good_sources, + pass_bega_mag / num_good_sources, + pass_gain_mag / num_good_sources + ); + if pass_issues > 0 { + pb_warn!("[peel_gpu] {} ({} issues)", msg, pass_issues); + } else { + pb_info!("[peel_gpu] {} (no issues)", msg); + } + } else { + pb_warn!( + "[peel_gpu] t{:03} pass {:2} all sources had issues", + timeblock.index, + pass + 1 + ); + } + + // Rotate back to the phase centre. + gpu_kernel_call!( + gpu::xyzs_to_uvws, + d_xyzs.get(), + d_lmsts.get(), + d_uvws_to.get_mut(), + gpu::RADec { + ra: obs_context.phase_centre.ra as GpuFloat, + dec: obs_context.phase_centre.dec as GpuFloat, + }, + num_tiles_i32, + num_cross_baselines_i32, + num_timesteps_i32 + )?; + + gpu_kernel_call!( + gpu::rotate, + d_high_res_vis_tfb.get_mut().cast(), + num_timesteps_i32, + num_cross_baselines_i32, + num_high_res_chans_i32, + d_uvws_from.get(), + d_uvws_to.get(), + d_lambdas.get() + )?; + } + + // copy results back to host + d_high_res_vis_tfb.copy_from_device(vis_residual_tfb.as_slice_mut().unwrap())?; + } + + let mut pass_counts = HashMap::::new(); + for bad_source in bad_sources.iter() { + *pass_counts + .entry(bad_source.source_name.clone()) + .or_default() += 1; + } + let mut printed = HashSet::::new(); + bad_sources.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal)); + for bad_source in bad_sources.into_iter() { + let BadSource { + gpstime, + // pass, + i_source, + source_name, + alpha, + beta, + gain, + .. + } = bad_source; + // if source_name is in printed + if printed.contains(&source_name) { + continue; + } else { + printed.insert(source_name.clone()); + } + let passes = pass_counts[&source_name]; + + let pos = source_weighted_positions[i_source]; + let RADec { ra, dec } = pos; + pb_warn!( + "[peel_gpu] Bad source: {:.2} p{:2} {} at radec({:+7.2}, {:+7.2}) iono({:+8.6},{:+8.6},{:+3.2})", + gpstime, + passes, + source_name, + (ra + 180.) % 360. - 180., + dec, + alpha, + beta, + gain + ); + let matches = source_list.search_asec(pos, 300.); + for (sep, src_name, idx, comp) in matches { + let compstr = match comp.comp_type { + ComponentType::Gaussian { maj, min, pa } => format!( + "G {:6.1}as {:6.1}as {:+6.1}d", + maj.to_degrees() * 3600., + min.to_degrees() * 3600., + pa.to_degrees() + ), + ComponentType::Point => format!("P {:8} {:8} {:7}", "", "", ""), + _ => format!("? {:8} {:8} {:7}", "", "", ""), + }; + let fluxstr = match comp.flux_type { + FluxDensityType::CurvedPowerLaw { + si, + fd: FluxDensity { freq, i, .. }, + q, + } => format!( + "cpl S={:+6.2}(νn)^{:+5.2} exp[{:+5.2}ln(νn)]; @{:.1}MHz", + i, + si, + q, + freq / 1e6 + ), + FluxDensityType::PowerLaw { + si, + fd: FluxDensity { freq, i, .. }, + } => format!("pl S={:+6.2}(νn)^{:+5.2}; @{:.1}MHz", i, si, freq / 1e6), + FluxDensityType::List(fds) => { + let FluxDensity { i, freq, .. } = fds[0]; + format!("lst S={:+6.2} @{:.1}MHz", i, freq / 1e6) + } + }; + pb_warn!( + "[peel_gpu] {sep:5.1} {src_name:16} c{idx:2} at radec({:+7.2},{:+7.2}) comp({}) flx ({})", + (comp.radec.ra + 180.) % 360. - 180., + comp.radec.dec, + compstr, + fluxstr, + ); + } + } + + Ok(()) +} diff --git a/src/params/peel/mod.rs b/src/params/peel/mod.rs index c9c1f259..f160c9ef 100644 --- a/src/params/peel/mod.rs +++ b/src/params/peel/mod.rs @@ -7,7 +7,6 @@ mod tests; use std::{ borrow::Cow, - cmp::Ordering, f64::consts::TAU, io::Write, num::NonZeroUsize, @@ -41,7 +40,6 @@ use crate::{ averaging::{Spw, Timeblock}, beam::Beam, context::ObsContext, - di_calibrate::calibrate_timeblock, io::{ read::VisReadError, write::{write_vis, VisTimestep}, @@ -51,11 +49,6 @@ use crate::{ srclist::SourceList, Chanblock, TileBaselineFlags, MODEL_DEVICE, PROGRESS_BARS, }; -#[cfg(any(feature = "cuda", feature = "hip"))] -use crate::{ - gpu::{self, gpu_kernel_call, DevicePointer, GpuError, GpuFloat}, - model::SkyModellerGpu, -}; use super::{InputVisParams, ModellingParams, OutputVisParams}; @@ -85,47 +78,139 @@ pub(crate) struct SourceIonoConsts { // pub(crate) centroid_timestamps: Vec, } -#[derive(Debug, PartialEq)] //, Serialize, Deserialize)] -pub(crate) struct BadSource { - // timeblock: Timeblock, - pub(crate) gpstime: f64, - pub(crate) pass: usize, - // pub(crate) gsttime: Epoch, - // pub(crate) times: Vec, - // source: Source, - pub(crate) i_source: usize, - pub(crate) source_name: String, - // pub(crate) weighted_catalogue_pos_j2000: RADec, - // iono_consts: IonoConsts, - pub(crate) alpha: f64, - pub(crate) beta: f64, - pub(crate) gain: f64, - // pub(crate) alphas: Vec, - // pub(crate) betas: Vec, - // pub(crate) gains: Vec, - pub(crate) residuals_i: Vec>, - pub(crate) residuals_q: Vec>, - pub(crate) residuals_u: Vec>, - pub(crate) residuals_v: Vec>, +pub(crate) struct PeelWeightParams { + uvw_min_metres: f64, + uvw_max_metres: f64, + short_baseline_sigma: f64, } -// custom sorting implementations -impl PartialOrd for BadSource { - fn partial_cmp(&self, other: &Self) -> Option { - match self.source_name.partial_cmp(&other.source_name) { - // Some(Ordering::Equal) => match self.gpstime.partial_cmp(&other.gpstime) { - Some(Ordering::Equal) => match self.pass.partial_cmp(&other.pass) { - Some(Ordering::Less) => Some(Ordering::Greater), - Some(Ordering::Greater) => Some(Ordering::Less), - other => other, - }, - // other => other, - // }, - other => other, +impl PeelWeightParams { + pub(crate) fn apply_tfb( + &self, + // TODO: take an arrayview + // mut vis_weights_tfb: ArrayMutView3, + vis_weights_tfb: ArrayBase, Dim<[usize; 3]>>, + obs_context: &ObsContext, + timeblock: &Timeblock, + apply_precession: bool, + chanblocks: &[Chanblock], + tile_baseline_flags: &TileBaselineFlags, + ) -> ArrayBase, Dim<[usize; 3]>> { + let array_position = obs_context.array_position; + let dut1 = obs_context.dut1.unwrap_or_default(); + + let all_fine_chan_lambdas_m = chanblocks + .iter() + .map(|c| VEL_C / c.freq) + .collect::>(); + + let flagged_tiles = &tile_baseline_flags.flagged_tiles; + + let unflagged_tile_xyzs: Vec = obs_context + .tile_xyzs + .par_iter() + .enumerate() + .filter(|(tile_index, _)| !flagged_tiles.contains(tile_index)) + .map(|(_, xyz)| *xyz) + .collect(); + + let num_tiles = unflagged_tile_xyzs.len(); + + let mut baseline_weights = Vec1::try_from_vec(vec![ + 1.0; + tile_baseline_flags + .unflagged_cross_baseline_to_tile_map + .len() + ]) + .expect("not possible to have no unflagged tiles here"); + + let average_timestamp = timeblock.median; + + let (tile_xyzs, lmst) = if apply_precession { + let precession_info = precess_time( + array_position.longitude_rad, + array_position.latitude_rad, + obs_context.phase_centre, + average_timestamp, + dut1, + ); + let precessed_tile_xyzs = precession_info.precess_xyz(&unflagged_tile_xyzs); + (precessed_tile_xyzs, precession_info.lmst_j2000) + } else { + ( + unflagged_tile_xyzs.clone(), + get_lmst(array_position.longitude_rad, average_timestamp, dut1), + ) + }; + let uvws = xyzs_to_cross_uvws(&tile_xyzs, obs_context.phase_centre.to_hadec(lmst)); + + assert_eq!(baseline_weights.len(), uvws.len()); + for (UVW { u, v, w }, baseline_weight) in uvws.into_iter().zip(baseline_weights.iter_mut()) + { + let uvw_length = (u.powi(2) + v.powi(2) + w.powi(2)).sqrt(); + if uvw_length < self.uvw_min_metres || uvw_length > self.uvw_max_metres { + *baseline_weight = 0.0; + } } + + let timestamps = &timeblock.timestamps; + let mut tile_uvs_high_res = Array2::::default((timestamps.len(), num_tiles)); + // Pre-compute high-res tile UVs and Ws at observation phase centre. + for (&time, mut tile_uvs_high_res) in + izip!(timestamps.iter(), tile_uvs_high_res.outer_iter_mut(),) + { + let (tile_xyzs, lmst) = if apply_precession { + let precession_info = precess_time( + array_position.longitude_rad, + array_position.latitude_rad, + obs_context.phase_centre, + time, + dut1, + ); + let precessed_tile_xyzs = precession_info.precess_xyz(&unflagged_tile_xyzs); + (precessed_tile_xyzs, precession_info.lmst_j2000) + } else { + ( + unflagged_tile_xyzs.to_owned(), + get_lmst(array_position.longitude_rad, time, dut1), + ) + }; + let hadec_phase = obs_context.phase_centre.to_hadec(lmst); + let (s_ha, c_ha) = hadec_phase.ha.sin_cos(); + let (s_dec, c_dec) = hadec_phase.dec.sin_cos(); + for (tile_uv, &tile_xyz) in izip!(tile_uvs_high_res.iter_mut(), tile_xyzs.iter(),) { + let uvw = UVW::from_xyz_inner(tile_xyz, s_ha, c_ha, s_dec, c_dec); + *tile_uv = UV { u: uvw.u, v: uvw.v }; + } + } + + // // use the baseline taper from the RTS, 1-exp(-(u*u+v*v)/(2*sig^2)); + let vis_weights_tfb = { + let mut iono_taper = get_weights_rts( + tile_uvs_high_res.view(), + &all_fine_chan_lambdas_m, + self.short_baseline_sigma, + ); + iono_taper *= &vis_weights_tfb; + iono_taper + }; + assert_eq!(baseline_weights.len(), vis_weights_tfb.len_of(Axis(2))); + let mut vis_weights_tfb = vis_weights_tfb.to_owned(); + for (vis_weight, bl_weight) in vis_weights_tfb + .iter_mut() + .zip(baseline_weights.iter().cycle()) + { + *vis_weight = (*vis_weight as f64 * *bl_weight) as f32; + } + vis_weights_tfb } } +#[cfg(any(feature = "cuda", feature = "hip"))] +mod gpu; +#[cfg(any(feature = "cuda", feature = "hip"))] +use gpu::peel_gpu; + pub(crate) struct PeelParams { pub(crate) input_vis_params: InputVisParams, pub(crate) output_vis_params: Option, @@ -174,6 +259,12 @@ impl PeelParams { let tile_baseline_flags = &input_vis_params.tile_baseline_flags; let flagged_tiles = &tile_baseline_flags.flagged_tiles; + let peel_weight_params = PeelWeightParams { + uvw_min_metres: *uvw_min_metres, + uvw_max_metres: *uvw_max_metres, + short_baseline_sigma: *short_baseline_sigma, + }; + let unflagged_tile_xyzs: Vec = obs_context .tile_xyzs .par_iter() @@ -186,7 +277,7 @@ impl PeelParams { let all_fine_chan_freqs_hz = Vec1::try_from_vec(spw.chanblocks.iter().map(|c| c.freq).collect()).unwrap(); let all_fine_chan_lambdas_m = all_fine_chan_freqs_hz.mapped_ref(|f| VEL_C / *f); - let (low_res_freqs_hz, low_res_lambdas_m): (Vec<_>, Vec<_>) = low_res_spw + let (_low_res_freqs_hz, low_res_lambdas_m): (Vec<_>, Vec<_>) = low_res_spw .chanblocks .iter() .map(|c| { @@ -379,16 +470,10 @@ impl PeelParams { *num_loops, obs_context, &unflagged_tile_xyzs, - *uvw_min_metres, - *uvw_max_metres, - *short_baseline_sigma, + &peel_weight_params, *convergence, tile_baseline_flags, - array_position, - input_vis_params.dut1, &spw.chanblocks, - &all_fine_chan_lambdas_m, - &low_res_freqs_hz, &low_res_lambdas_m, *apply_precession, output_vis_params.as_ref(), @@ -1047,25 +1132,34 @@ fn peel_cpu( source_weighted_positions: &[RADec], num_passes: usize, num_loops: usize, - short_baseline_sigma: f64, convergence: f64, - // TODO (dev): Why do we need both this and low_res_lambdas_m? it's not even used - _low_res_freqs_hz: &[f64], - all_fine_chan_lambdas_m: &[f64], + chanblocks: &[Chanblock], low_res_lambdas_m: &[f64], obs_context: &ObsContext, - // TODO (dev): array_position is available from obs_context - array_position: LatLngHeight, - // TODO (dev): unflagged_tile_xyzs is available from obs_context - unflagged_tile_xyzs: &[XyzGeodetic], + tile_baseline_flags: &TileBaselineFlags, high_res_modeller: &mut dyn SkyModeller, - // TODO (dev): dut1 is available from obs_context - dut1: Duration, no_precession: bool, multi_progress_bar: &MultiProgress, ) -> Result<(), PeelError> { // TODO: Do we allow multiple timesteps in the low-res data? + let all_fine_chan_lambdas_m = chanblocks + .iter() + .map(|c| VEL_C / c.freq) + .collect::>(); + + let flagged_tiles = &tile_baseline_flags.flagged_tiles; + let unflagged_tile_xyzs: Vec = obs_context + .tile_xyzs + .par_iter() + .enumerate() + .filter(|(tile_index, _)| !flagged_tiles.contains(tile_index)) + .map(|(_, xyz)| *xyz) + .collect(); + + let array_position = obs_context.array_position; + let dut1 = obs_context.dut1.unwrap_or_default(); + let timestamps = &timeblock.timestamps; let num_timestamps_high_res = timestamps.len(); let num_timestamps_low_res = 1; @@ -1133,11 +1227,11 @@ fn peel_cpu( time, dut1, ); - let precessed_xyzs = precession_info.precess_xyz(unflagged_tile_xyzs); + let precessed_xyzs = precession_info.precess_xyz(&unflagged_tile_xyzs); (precession_info.lmst_j2000, precessed_xyzs) } else { let lmst = get_lmst(array_position.longitude_rad, time, dut1); - (lmst, unflagged_tile_xyzs.into()) + (lmst, unflagged_tile_xyzs.clone()) }; let hadec_phase = obs_context.phase_centre.to_hadec(lmst); let (s_ha, c_ha) = hadec_phase.ha.sin_cos(); @@ -1156,7 +1250,7 @@ fn peel_cpu( let (average_lmst, _average_latitude, average_tile_xyzs) = if no_precession { let average_timestamp = timeblock.median; let average_tile_xyzs = - ArrayView2::from_shape((1, num_tiles), unflagged_tile_xyzs).expect("correct shape"); + ArrayView2::from_shape((1, num_tiles), &unflagged_tile_xyzs).expect("correct shape"); ( get_lmst(array_position.longitude_rad, average_timestamp, dut1), array_position.latitude_rad, @@ -1173,7 +1267,7 @@ fn peel_cpu( ); let average_precessed_tile_xyzs = Array2::from_shape_vec( (1, num_tiles), - average_precession_info.precess_xyz(unflagged_tile_xyzs), + average_precession_info.precess_xyz(&unflagged_tile_xyzs), ) .expect("correct shape"); @@ -1184,19 +1278,6 @@ fn peel_cpu( ) }; - // TODO (Dev): iono_taper_weights could be supplied to peel - // use the baseline taper from the RTS, 1-exp(-(u*u+v*v)/(2*sig^2)); - // TODO: Do we care about weights changing over time? - let weights = { - let mut iono_taper = get_weights_rts( - tile_uvs_high_res.view(), - all_fine_chan_lambdas_m, - short_baseline_sigma, - ); - iono_taper *= &vis_weights; - iono_taper - }; - // Temporary visibility array, re-used for each timestep // TODO (Dev): rename resid_hi_src_tfb let mut vis_residual_tmp = vis_residual.to_owned(); @@ -1222,7 +1303,7 @@ fn peel_cpu( let mut vis_weights_low_res: Array3 = Array3::zeros(vis_residual_low_res.dim()); // The low-res weights only need to be populated once. - weights_average(weights.view(), vis_weights_low_res.view_mut()); + weights_average(vis_weights.view(), vis_weights_low_res.view_mut()); for pass in 0..num_passes { for (((source_name, source), iono_consts), source_pos) in source_list @@ -1262,11 +1343,11 @@ fn peel_cpu( time, dut1, ); - let precessed_xyzs = precession_info.precess_xyz(unflagged_tile_xyzs); + let precessed_xyzs = precession_info.precess_xyz(&unflagged_tile_xyzs); (precession_info.lmst_j2000, precessed_xyzs) } else { let lmst = get_lmst(array_position.longitude_rad, time, dut1); - (lmst, unflagged_tile_xyzs.into()) + (lmst, unflagged_tile_xyzs.clone()) }; let hadec_source = source_pos.to_hadec(lmst); let (s_ha, c_ha) = hadec_source.ha.sin_cos(); @@ -1303,7 +1384,7 @@ fn peel_cpu( vis_residual_tmp.view_mut(), tile_ws_from.view(), tile_ws_to.view(), - all_fine_chan_lambdas_m, + &all_fine_chan_lambdas_m, ); multi_progress_bar.suspend(|| trace!("{:?}: high-res model rotate", start.elapsed())); @@ -1313,7 +1394,7 @@ fn peel_cpu( model_hi_src_tfb.view_mut(), tile_ws_from.view(), tile_ws_to.view(), - all_fine_chan_lambdas_m, + &all_fine_chan_lambdas_m, ); multi_progress_bar.suspend(|| trace!("{:?}: high-res model iono", start.elapsed())); @@ -1322,7 +1403,7 @@ fn peel_cpu( model_hi_src_iono_tfb.view_mut(), tile_uvs_high_res_rot.view(), *iono_consts, - all_fine_chan_lambdas_m, + &all_fine_chan_lambdas_m, ); // Add the high-res model to the residuals. @@ -1337,7 +1418,7 @@ fn peel_cpu( vis_average2( vis_residual_tmp.view(), vis_residual_low_res.view_mut(), - weights.view(), + vis_weights.view(), ); multi_progress_bar.suspend(|| trace!("{:?}: alpha/beta loop", start.elapsed())); @@ -1353,13 +1434,13 @@ fn peel_cpu( model_hi_src_iono_tfb.view_mut(), tile_uvs_high_res_rot.view(), *iono_consts, - all_fine_chan_lambdas_m, + &all_fine_chan_lambdas_m, ); vis_average2( model_hi_src_iono_tfb.view(), vis_model_low_res_tmp.view_mut(), - weights.view(), + vis_weights.view(), ); let iono_fits = iono_fit( @@ -1400,7 +1481,7 @@ fn peel_cpu( tile_uvs_high_res_rot.view(), *iono_consts, old_iono_consts, - all_fine_chan_lambdas_m, + &all_fine_chan_lambdas_m, ); multi_progress_bar.suspend(|| { @@ -1415,942 +1496,6 @@ fn peel_cpu( Ok(()) } -#[cfg(any(feature = "cuda", feature = "hip"))] -#[allow(clippy::too_many_arguments)] -fn peel_gpu( - mut vis_residual_tfb: ArrayViewMut3>, - vis_weights_tfb: ArrayView3, - timeblock: &Timeblock, - source_list: &SourceList, - iono_consts: &mut [IonoConsts], - source_weighted_positions: &[RADec], - num_passes: usize, - num_loops: usize, - // TODO (Dev): bake this into weights - short_baseline_sigma: f64, - convergence: f64, - // TODO (Dev): rename chanblocks - high_res_chanblocks: &[Chanblock], - // TODO (Dev): derive from high_res_chanblocks - all_fine_chan_lambdas_m: &[f64], - low_res_lambdas_m: &[f64], - obs_context: &ObsContext, - array_position: LatLngHeight, - unflagged_tile_xyzs: &[XyzGeodetic], - // TODO (Dev): bake this into weights - baseline_weights: &[f64], - high_res_modeller: &mut SkyModellerGpu, - dut1: Duration, - no_precession: bool, - multi_progress_bar: &MultiProgress, -) -> Result<(), PeelError> { - use std::collections::{HashMap, HashSet}; - - use crate::srclist::{ComponentType, FluxDensity, FluxDensityType}; - - let timestamps = &timeblock.timestamps; - - let num_timesteps = vis_residual_tfb.len_of(Axis(0)); - let num_tiles = unflagged_tile_xyzs.len(); - let num_cross_baselines = (num_tiles * (num_tiles - 1)) / 2; - let num_high_res_chans = all_fine_chan_lambdas_m.len(); - let num_high_res_chans_spw = high_res_chanblocks.len(); - assert_eq!( - num_high_res_chans, num_high_res_chans_spw, - "chans from fine_chan_lambdas {} != chans in high_res_chanblocks (flagged+unflagged) {}", - num_high_res_chans, num_high_res_chans_spw - ); - - let num_low_res_chans = low_res_lambdas_m.len(); - assert!( - num_high_res_chans % num_low_res_chans == 0, - "TODO: averaging can't deal with non-integer ratios. channels high {} low {}", - num_high_res_chans, - num_low_res_chans - ); - - let num_timesteps_i32: i32 = num_timesteps.try_into().expect("smaller than i32::MAX"); - let num_tiles_i32: i32 = num_tiles.try_into().expect("smaller than i32::MAX"); - let num_cross_baselines_i32: i32 = num_cross_baselines - .try_into() - .expect("smaller than i32::MAX"); - let num_high_res_chans_i32 = num_high_res_chans - .try_into() - .expect("smaller than i32::MAX"); - let num_low_res_chans_i32 = num_low_res_chans.try_into().expect("smaller than i32::MAX"); - - let num_sources_to_iono_subtract = iono_consts.len(); - - let (time_axis, freq_axis, baseline_axis) = (Axis(0), Axis(1), Axis(2)); - - assert_eq!(vis_residual_tfb.len_of(time_axis), timestamps.len()); - assert_eq!(vis_weights_tfb.len_of(time_axis), timestamps.len()); - - assert_eq!(vis_residual_tfb.len_of(baseline_axis), num_cross_baselines); - assert_eq!(vis_weights_tfb.len_of(baseline_axis), num_cross_baselines); - - assert_eq!(vis_residual_tfb.len_of(freq_axis), num_high_res_chans); - assert_eq!(vis_weights_tfb.len_of(freq_axis), num_high_res_chans); - - assert!(num_sources_to_iono_subtract <= source_list.len()); - - if num_sources_to_iono_subtract == 0 { - return Ok(()); - } - - let timestamps = &timeblock.timestamps; - let peel_progress = multi_progress_bar.add( - ProgressBar::new(num_sources_to_iono_subtract as _) - .with_style( - ProgressStyle::default_bar() - .template("{msg:17}: [{wide_bar:.blue}] {pos:2}/{len:2} sources ({elapsed_precise}<{eta_precise})").unwrap() - .progress_chars("=> "), - ) - .with_position(0) - ); - peel_progress.tick(); - - macro_rules! pb_warn { ($($arg:tt)+) => (multi_progress_bar.suspend(|| warn!($($arg)+))) } - macro_rules! pb_info { ($($arg:tt)+) => (multi_progress_bar.suspend(|| info!($($arg)+))) } - macro_rules! pb_debug { ($($arg:tt)+) => (multi_progress_bar.suspend(|| debug!($($arg)+))) } - macro_rules! pb_trace { ($($arg:tt)+) => (multi_progress_bar.suspend(|| trace!($($arg)+))) } - - let mut lmsts = vec![0.; timestamps.len()]; - let mut latitudes = vec![0.; timestamps.len()]; - let mut tile_xyzs_high_res = Array2::::default((timestamps.len(), num_tiles)); - let mut high_res_uvws = Array2::default((timestamps.len(), num_cross_baselines)); - let mut tile_uvs_high_res = Array2::::default((timestamps.len(), num_tiles)); - let mut tile_ws_high_res = Array2::::default((timestamps.len(), num_tiles)); - - // Pre-compute high-res tile UVs and Ws at observation phase centre. - for ( - &time, - lmst, - latitude, - mut tile_xyzs_high_res, - mut high_res_uvws, - mut tile_uvs_high_res, - mut tile_ws_high_res, - ) in izip!( - timestamps.iter(), - lmsts.iter_mut(), - latitudes.iter_mut(), - tile_xyzs_high_res.outer_iter_mut(), - high_res_uvws.outer_iter_mut(), - tile_uvs_high_res.outer_iter_mut(), - tile_ws_high_res.outer_iter_mut(), - ) { - if !no_precession { - let precession_info = precess_time( - array_position.longitude_rad, - array_position.latitude_rad, - obs_context.phase_centre, - time, - dut1, - ); - tile_xyzs_high_res - .iter_mut() - .zip_eq(&precession_info.precess_xyz(unflagged_tile_xyzs)) - .for_each(|(a, b)| *a = *b); - *lmst = precession_info.lmst_j2000; - *latitude = precession_info.array_latitude_j2000; - } else { - tile_xyzs_high_res - .iter_mut() - .zip_eq(unflagged_tile_xyzs) - .for_each(|(a, b)| *a = *b); - *lmst = get_lmst(array_position.longitude_rad, time, dut1); - *latitude = array_position.latitude_rad; - }; - let hadec_phase = obs_context.phase_centre.to_hadec(*lmst); - let (s_ha, c_ha) = hadec_phase.ha.sin_cos(); - let (s_dec, c_dec) = hadec_phase.dec.sin_cos(); - let mut tile_uvws_high_res = vec![UVW::default(); num_tiles]; - for (tile_uvw, tile_uv, tile_w, &tile_xyz) in izip!( - tile_uvws_high_res.iter_mut(), - tile_uvs_high_res.iter_mut(), - tile_ws_high_res.iter_mut(), - tile_xyzs_high_res.iter(), - ) { - let uvw = UVW::from_xyz_inner(tile_xyz, s_ha, c_ha, s_dec, c_dec); - *tile_uvw = uvw; - *tile_uv = UV { u: uvw.u, v: uvw.v }; - *tile_w = W(uvw.w); - } - - // The UVWs for every timestep will be the same (because the phase - // centres are always the same). Make these ahead of time for - // efficiency. - let mut count = 0; - for (i, t1) in tile_uvws_high_res.iter().enumerate() { - for t2 in tile_uvws_high_res.iter().skip(i + 1) { - high_res_uvws[count] = *t1 - *t2; - count += 1; - } - } - } - - // /////// // - // WEIGHTS // - // /////// // - - // // use the baseline taper from the RTS, 1-exp(-(u*u+v*v)/(2*sig^2)); - let vis_weights_tfb = { - let mut iono_taper = get_weights_rts( - tile_uvs_high_res.view(), - all_fine_chan_lambdas_m, - short_baseline_sigma, - ); - iono_taper *= &vis_weights_tfb; - iono_taper - }; - let vis_weights_tfb = { - assert_eq!(baseline_weights.len(), vis_weights_tfb.len_of(Axis(2))); - let mut vis_weights_tfb = vis_weights_tfb.to_owned(); - for (vis_weight, bl_weight) in vis_weights_tfb - .iter_mut() - .zip(baseline_weights.iter().cycle()) - { - *vis_weight = (*vis_weight as f64 * *bl_weight) as f32; - } - vis_weights_tfb - }; - - // ////////////////// // - // LOW RES PRECESSION // - // ////////////////// // - - let (average_lmst, _average_latitude, average_tile_xyzs) = if no_precession { - let average_timestamp = timeblock.median; - let average_tile_xyzs = - ArrayView2::from_shape((1, num_tiles), unflagged_tile_xyzs).expect("correct shape"); - ( - get_lmst(array_position.longitude_rad, average_timestamp, dut1), - array_position.latitude_rad, - CowArray::from(average_tile_xyzs), - ) - } else { - let average_timestamp = timeblock.median; - let average_precession_info = precess_time( - array_position.longitude_rad, - array_position.latitude_rad, - obs_context.phase_centre, - average_timestamp, - dut1, - ); - let average_precessed_tile_xyzs = Array2::from_shape_vec( - (1, num_tiles), - average_precession_info.precess_xyz(unflagged_tile_xyzs), - ) - .expect("correct shape"); - - ( - average_precession_info.lmst_j2000, - average_precession_info.array_latitude_j2000, - CowArray::from(average_precessed_tile_xyzs), - ) - }; - - let gpu_xyzs_high_res: Vec<_> = tile_xyzs_high_res - .iter() - .copied() - .map(|XyzGeodetic { x, y, z }| gpu::XYZ { - x: x as GpuFloat, - y: y as GpuFloat, - z: z as GpuFloat, - }) - .collect(); - let d_xyzs = DevicePointer::copy_to_device(&gpu_xyzs_high_res)?; - - // temporary arrays for accumulation - // TODO: Do a stocktake of arrays that are lying around! - let vis_residual_low_res_fb: Array3> = - Array3::zeros((1, num_low_res_chans, num_cross_baselines)); - let mut vis_weights_low_res_fb: Array3 = Array3::zeros(vis_residual_low_res_fb.raw_dim()); - - // The low-res weights only need to be populated once. - weights_average(vis_weights_tfb.view(), vis_weights_low_res_fb.view_mut()); - - let freq_average_factor: i32 = (all_fine_chan_lambdas_m.len() / num_low_res_chans) - .try_into() - .expect("smaller than i32::MAX"); - - let mut bad_sources = Vec::::new(); - - unsafe { - let mut gpu_uvws: ArrayBase, Dim<[usize; 2]>> = - Array2::default((num_timesteps, num_cross_baselines)); - gpu_uvws - .outer_iter_mut() - .zip(tile_xyzs_high_res.outer_iter()) - .zip(lmsts.iter()) - .for_each(|((mut gpu_uvws, xyzs), lmst)| { - let phase_centre = obs_context.phase_centre.to_hadec(*lmst); - let v = xyzs_to_cross_uvws(xyzs.as_slice().unwrap(), phase_centre) - .into_iter() - .map(|uvw| gpu::UVW { - u: uvw.u as GpuFloat, - v: uvw.v as GpuFloat, - w: uvw.w as GpuFloat, - }) - .collect::>(); - gpu_uvws.assign(&ArrayView1::from(&v)); - }); - let mut d_uvws_from = DevicePointer::copy_to_device(gpu_uvws.as_slice().unwrap())?; - let mut d_uvws_to = - DevicePointer::malloc(gpu_uvws.len() * std::mem::size_of::())?; - - let gpu_lmsts: Vec = lmsts.iter().map(|l| *l as GpuFloat).collect(); - let d_lmsts = DevicePointer::copy_to_device(&gpu_lmsts)?; - - let gpu_lambdas: Vec = all_fine_chan_lambdas_m - .iter() - .map(|l| *l as GpuFloat) - .collect(); - let d_lambdas = DevicePointer::copy_to_device(&gpu_lambdas)?; - - let gpu_xyzs_low_res: Vec<_> = average_tile_xyzs - .iter() - .copied() - .map(|XyzGeodetic { x, y, z }| gpu::XYZ { - x: x as GpuFloat, - y: y as GpuFloat, - z: z as GpuFloat, - }) - .collect(); - let d_xyzs_low_res = DevicePointer::copy_to_device(&gpu_xyzs_low_res)?; - - let gpu_low_res_lambdas: Vec = - low_res_lambdas_m.iter().map(|l| *l as GpuFloat).collect(); - let d_low_res_lambdas = DevicePointer::copy_to_device(&gpu_low_res_lambdas)?; - - let d_average_lmsts = DevicePointer::copy_to_device(&[average_lmst as GpuFloat])?; - let mut d_low_res_uvws: DevicePointer = - DevicePointer::malloc(gpu_uvws.len() * std::mem::size_of::())?; - // Make the amount of elements in `d_iono_fits` a power of 2, for - // efficiency. - let mut d_iono_fits = { - let min_size = - num_cross_baselines * num_low_res_chans * std::mem::size_of::>(); - let n = (min_size as f64).log2().ceil() as u32; - let size = 2_usize.pow(n); - let mut d: DevicePointer> = DevicePointer::malloc(size).unwrap(); - d.clear(); - d - }; - - // let mut d_low_res_vis = DevicePointer::malloc( - // num_cross_baselines * low_res_freqs_hz.len() * std::mem::size_of::>(), - // ); - // let mut d_low_res_weights = DevicePointer::malloc( - // num_cross_baselines * low_res_freqs_hz.len() * std::mem::size_of::(), - // ); - - let mut d_high_res_vis_tfb = - DevicePointer::copy_to_device(vis_residual_tfb.as_slice().unwrap())?; - // TODO: rename d_high_res_resid_tfb - let mut d_high_res_vis2_tfb = - DevicePointer::copy_to_device(vis_residual_tfb.as_slice().unwrap())?; - let d_high_res_weights_tfb = - DevicePointer::copy_to_device(vis_weights_tfb.as_slice().unwrap())?; - - // TODO: rename d_low_res_resid_fb - let mut d_low_res_vis_fb = - DevicePointer::copy_to_device(vis_residual_low_res_fb.as_slice().unwrap())?; - let d_low_res_weights_fb = - DevicePointer::copy_to_device(vis_weights_low_res_fb.as_slice().unwrap())?; - - let mut d_high_res_model_tfb: DevicePointer> = DevicePointer::malloc( - timestamps.len() - * num_cross_baselines - * all_fine_chan_lambdas_m.len() - * std::mem::size_of::>(), - )?; - let mut d_low_res_model_fb = - DevicePointer::copy_to_device(vis_residual_low_res_fb.as_slice().unwrap())?; - let mut d_low_res_model_rotated = - DevicePointer::copy_to_device(vis_residual_low_res_fb.as_slice().unwrap())?; - - // One pointer per timestep. - let mut d_uvws = Vec::with_capacity(high_res_uvws.len_of(Axis(0))); - // Temp vector to store results. - let mut gpu_uvws = vec![gpu::UVW::default(); high_res_uvws.len_of(Axis(1))]; - for uvws in high_res_uvws.outer_iter() { - // Convert the type and push the results to the device, - // saving the resulting pointer. - uvws.iter() - .zip_eq(gpu_uvws.iter_mut()) - .for_each(|(&UVW { u, v, w }, gpu_uvw)| { - *gpu_uvw = gpu::UVW { - u: u as GpuFloat, - v: v as GpuFloat, - w: w as GpuFloat, - } - }); - d_uvws.push(DevicePointer::copy_to_device(&gpu_uvws)?); - } - let mut d_beam_jones = DevicePointer::default(); - - for pass in 0..num_passes { - peel_progress.reset(); - peel_progress.set_message(format!( - "Peeling timeblock {}, pass {}", - timeblock.index + 1, - pass + 1 - )); - - let mut pass_issues = 0; - let mut pass_alpha_mag = 0.; - let mut pass_bega_mag = 0.; - let mut pass_gain_mag = 0.; - - // this needs to be inside the pass loop, because d_uvws_from gets swapped with d_uvws_to - gpu_kernel_call!( - gpu::xyzs_to_uvws, - d_xyzs.get(), - d_lmsts.get(), - d_uvws_from.get_mut(), - gpu::RADec { - ra: obs_context.phase_centre.ra as GpuFloat, - dec: obs_context.phase_centre.dec as GpuFloat, - }, - num_tiles_i32, - num_cross_baselines_i32, - num_timesteps_i32 - )?; - - for (i_source, (((source_name, source), iono_consts), source_pos)) in source_list - .iter() - .take(num_sources_to_iono_subtract) - .zip_eq(iono_consts.iter_mut()) - .zip(source_weighted_positions.iter().copied()) - .enumerate() - { - let start = std::time::Instant::now(); - // pb_debug!( - // "peel loop {pass}: {source_name} at {source_pos} (has iono {iono_consts:?})" - // ); - - // let old_iono_consts = *iono_consts; - let old_iono_consts = IonoConsts { - alpha: iono_consts.alpha, - beta: iono_consts.beta, - gain: iono_consts.gain, - }; - let gpu_old_iono_consts = gpu::IonoConsts { - alpha: old_iono_consts.alpha, - beta: old_iono_consts.beta, - gain: old_iono_consts.gain, - }; - - gpu_kernel_call!( - gpu::xyzs_to_uvws, - d_xyzs.get(), - d_lmsts.get(), - d_uvws_to.get_mut(), - gpu::RADec { - ra: source_pos.ra as GpuFloat, - dec: source_pos.dec as GpuFloat, - }, - num_tiles_i32, - num_cross_baselines_i32, - num_timesteps_i32, - )?; - pb_trace!("{:?}: xyzs_to_uvws", start.elapsed()); - - // rotate d_high_res_vis_tfb in place - gpu_kernel_call!( - gpu::rotate, - d_high_res_vis_tfb.get_mut().cast(), - num_timesteps_i32, - num_cross_baselines_i32, - num_high_res_chans_i32, - d_uvws_from.get(), - d_uvws_to.get(), - d_lambdas.get() - )?; - pb_trace!("{:?}: rotate", start.elapsed()); - - // there's a bug in the modeller where it ignores --no-precession - // high_res_modeller.update_with_a_source(source, obs_context.phase_centre)?; - high_res_modeller.update_with_a_source(source, source_pos)?; - // Clear the old memory before reusing the buffer. - d_high_res_model_tfb.clear(); - for (i_time, (lmst, latitude)) in lmsts.iter().zip(latitudes.iter()).enumerate() { - let original_model_ptr = d_high_res_model_tfb.ptr; - d_high_res_model_tfb.ptr = d_high_res_model_tfb - .ptr - .add(i_time * num_cross_baselines * all_fine_chan_lambdas_m.len()); - let original_uvw_ptr = d_uvws_to.ptr; - d_uvws_to.ptr = d_uvws_to.ptr.add(i_time * num_cross_baselines); - high_res_modeller.model_timestep_with( - *lmst, - *latitude, - &d_uvws_to, - &mut d_beam_jones, - &mut d_high_res_model_tfb, - )?; - d_high_res_model_tfb.ptr = original_model_ptr; - d_uvws_to.ptr = original_uvw_ptr; - } - pb_trace!("{:?}: high res model", start.elapsed()); - - // d_high_res_vis2_tfb = residuals@src. - d_high_res_vis_tfb.copy_to(&mut d_high_res_vis2_tfb)?; - // add iono@src to residuals@src - gpu_kernel_call!( - gpu::add_model, - d_high_res_vis2_tfb.get_mut().cast(), - d_high_res_model_tfb.get().cast(), - gpu_old_iono_consts, - d_lambdas.get(), - d_uvws_to.get(), - num_timesteps_i32, - num_high_res_chans_i32, - num_cross_baselines_i32, - )?; - pb_trace!("{:?}: add_model", start.elapsed()); - - // *** UGLY HACK *** - // d_high_res_model_rotated = iono@src - let mut d_high_res_model_rotated: DevicePointer> = - DevicePointer::malloc(d_high_res_model_tfb.get_size())?; - d_high_res_model_rotated.clear(); - gpu_kernel_call!( - gpu::add_model, - d_high_res_model_rotated.get_mut().cast(), - d_high_res_model_tfb.get().cast(), - gpu_old_iono_consts, - d_lambdas.get(), - d_uvws_to.get(), - num_timesteps_i32, - num_high_res_chans_i32, - num_cross_baselines_i32, - )?; - pb_trace!("{:?}: add_model", start.elapsed()); - - gpu_kernel_call!( - gpu::average, - d_high_res_vis2_tfb.get().cast(), - d_high_res_weights_tfb.get(), - d_low_res_vis_fb.get_mut().cast(), - num_timesteps_i32, - num_cross_baselines_i32, - num_high_res_chans_i32, - freq_average_factor - )?; - pb_trace!("{:?}: average high res vis", start.elapsed()); - - gpu_kernel_call!( - gpu::average, - d_high_res_model_rotated.get().cast(), - d_high_res_weights_tfb.get(), - d_low_res_model_fb.get_mut().cast(), - num_timesteps_i32, - num_cross_baselines_i32, - num_high_res_chans_i32, - freq_average_factor - )?; - pb_trace!("{:?}: average high res model", start.elapsed()); - - gpu_kernel_call!( - gpu::xyzs_to_uvws, - d_xyzs_low_res.get(), - d_average_lmsts.get(), - d_low_res_uvws.get_mut(), - gpu::RADec { - ra: source_pos.ra as GpuFloat, - dec: source_pos.dec as GpuFloat, - }, - num_tiles_i32, - num_cross_baselines_i32, - 1, - )?; - pb_trace!("{:?}: low res xyzs_to_uvws", start.elapsed()); - - // !!!! - // comment me out - // !!!! - // we have low res residual and model, now print what iono fit will see. - // { - // use marlu::math::cross_correlation_baseline_to_tiles; - // let vis_residual_low_res_fb = d_low_res_vis_fb.copy_from_device_new()?; - // dbg!(vis_residual_low_res_fb.len(), num_low_res_chans, num_cross_baselines); - // let vis_residual_low_res_fb = Array2::from_shape_vec( - // (num_low_res_chans, num_cross_baselines), - // vis_residual_low_res_fb, - // ) - // .unwrap(); - // let vis_model_low_res_fb = d_low_res_model_fb.copy_from_device_new()?; - // let vis_model_low_res_fb = Array2::>::from_shape_vec( - // (num_low_res_chans, num_cross_baselines), - // vis_model_low_res_fb, - // ) - // .unwrap(); - // let vis_weights_low_res_fb = d_low_res_weights_fb.copy_from_device_new()?; - // let vis_weights_low_res_fb = Array2::::from_shape_vec( - // (num_low_res_chans, num_cross_baselines), - // vis_weights_low_res_fb, - // ) - // .unwrap(); - // let ant_pairs = (0..num_cross_baselines) - // .map(|bl_idx| cross_correlation_baseline_to_tiles(num_tiles, bl_idx)) - // .collect_vec(); - // let uvws_low_res = d_low_res_uvws.copy_from_device_new()?; - // for (ch_idx, (vis_residual_low_res_b, vis_model_low_res_b, vis_weights_low_res_b, &lambda)) in izip!( - // vis_residual_low_res_fb.outer_iter(), - // vis_model_low_res_fb.outer_iter(), - // vis_weights_low_res_fb.outer_iter(), - // low_res_lambdas_m, - // ).enumerate() { - // // dbg!(&ch_idx); - // if ch_idx > 1 { - // continue; - // } - // for (residual, model, weight, &gpu::UVW { u, v, w: _ }, &(ant1, ant2)) in izip!( - // vis_residual_low_res_b.iter(), - // vis_model_low_res_b.iter(), - // vis_weights_low_res_b.iter(), - // &uvws_low_res, - // ant_pairs.iter(), - // ) { - // // dbg!(&ant1, &ant2); - // if ant1 != 0 || (ant2 >= 16 && ant2 < num_tiles_i32 as usize - 16) { - // continue; - // } - // let residual_i = residual[0] + residual[3]; - // let model_i = model[0] + model[3]; - // let u = u as GpuFloat; - // let v = v as GpuFloat; - // println!("uv {ant1:3} {ant2:3} ({u:+9.3}, {v:+9.3}) l{lambda:+7.5} wt{weight:+3.1} | RI {:+11.7} @{:+5.3}pi | MI {:+11.7} @{:+5.3}pi", residual_i.norm(), residual_i.arg(), model_i.norm(), model_i.arg()); - // } - // } - // } - - let mut gpu_iono_consts = gpu::IonoConsts { - alpha: 0.0, - beta: 0.0, - gain: 1.0, - }; - // get size of device ptr - let lrblch = (num_cross_baselines_i32 * num_low_res_chans_i32) as f64; - pb_trace!("before iono_loop nt{:?} nxbl{:?} nlrch{:?} = lrxblch{:?}; lrvfb{:?} lrwfb{:?} lrmfb{:?} lrmrfb{:?}", - num_tiles_i32, - num_cross_baselines_i32, - num_low_res_chans_i32, - lrblch, - d_low_res_vis_fb.get_size() as f64 / lrblch, - d_low_res_weights_fb.get_size() as f64 / lrblch, - d_low_res_model_fb.get_size() as f64 / lrblch, - d_low_res_model_rotated.get_size() as f64 / lrblch, - ); - gpu_kernel_call!( - gpu::iono_loop, - d_low_res_vis_fb.get().cast(), - d_low_res_weights_fb.get(), - d_low_res_model_fb.get().cast(), - d_low_res_model_rotated.get_mut().cast(), - d_iono_fits.get_mut().cast(), - &mut gpu_iono_consts, - num_cross_baselines_i32, - num_low_res_chans_i32, - num_loops as i32, - d_low_res_uvws.get(), - d_low_res_lambdas.get(), - convergence as GpuFloat, - )?; - pb_trace!("{:?}: iono_loop", start.elapsed()); - - iono_consts.alpha = old_iono_consts.alpha + gpu_iono_consts.alpha; - iono_consts.beta = old_iono_consts.beta + gpu_iono_consts.beta; - iono_consts.gain = old_iono_consts.gain * gpu_iono_consts.gain; - - #[rustfmt::skip] - let issues = format!( - "{}{}{}", - if iono_consts.alpha.abs() > 1e-3 { - if iono_consts.alpha > 0.0 { "A" } else { "a" } - } else { - "" - }, - if iono_consts.beta.abs() > 1e-3 { - if iono_consts.beta > 0.0 { "B" } else { "b" } - } else { - "" - }, - if iono_consts.gain < 0.0 { - "g" - } else if iono_consts.gain > 1.5 { - "G" - } else { - "" - }, - ); - let message = format!( - // "t{:03} p{pass} s{i_source:6}|{source_name:16} @ ra {:+7.2} d {:+7.2} | a {:+7.2e} b {:+7.2e} g {:+3.2} | da {:+8.2e} db {:+8.2e} dg {:+3.2} | {}", - "t{:3} pass {:2} s{i_source:6}|{source_name:16} @ ra {:+7.2} d {:+7.2} | a {:+8.6} b {:+8.6} g {:+3.2} | da {:+8.6} db {:+8.6} dg {:+3.2} | {}", - timeblock.index, - pass+1, - (source_pos.ra.to_degrees() + 180.) % 360. - 180., - source_pos.dec.to_degrees(), - iono_consts.alpha, - iono_consts.beta, - iono_consts.gain, - iono_consts.alpha - old_iono_consts.alpha, - iono_consts.beta - old_iono_consts.beta, - iono_consts.gain - old_iono_consts.gain, - issues, - ); - if issues.is_empty() { - pb_debug!("[peel_gpu] {}", message); - pass_alpha_mag += (iono_consts.alpha - old_iono_consts.alpha).abs(); - pass_bega_mag += (iono_consts.beta - old_iono_consts.beta).abs(); - pass_gain_mag += iono_consts.gain - old_iono_consts.gain; - } else { - pb_debug!( - "[peel_gpu] {} (reverting to a {:+8.6} b {:+8.6} g {:+3.2})", - message, - old_iono_consts.alpha, - old_iono_consts.beta, - old_iono_consts.gain, - ); - bad_sources.push(BadSource { - gpstime: timeblock.median.to_gpst_seconds(), - pass, - i_source, - source_name: source_name.to_string(), - // weighted_catalogue_pos_j2000: source_pos, - alpha: iono_consts.alpha, - beta: iono_consts.beta, - gain: iono_consts.gain, - residuals_i: Vec::default(), // todo!(), - residuals_q: Vec::default(), // todo!(), - residuals_u: Vec::default(), // todo!(), - residuals_v: Vec::default(), // todo!(), - }); - - iono_consts.alpha = old_iono_consts.alpha; - iono_consts.beta = old_iono_consts.beta; - iono_consts.gain = old_iono_consts.gain; - pass_issues += 1; - } - - let gpu_iono_consts = gpu::IonoConsts { - alpha: iono_consts.alpha, - beta: iono_consts.beta, - gain: iono_consts.gain, - }; - - pb_trace!("subtracting {gpu_old_iono_consts:?} adding {gpu_iono_consts:?}"); - - // vis += iono(model, old_consts) - iono(model, consts) - gpu_kernel_call!( - gpu::subtract_iono, - d_high_res_vis_tfb.get_mut().cast(), - d_high_res_model_tfb.get().cast(), - gpu_iono_consts, - gpu_old_iono_consts, - d_uvws_to.get(), - d_lambdas.get(), - num_timesteps_i32, - num_cross_baselines_i32, - num_high_res_chans_i32, - )?; - pb_trace!("{:?}: subtract_iono", start.elapsed()); - - // Peel? - let num_sources_to_peel = 0; - if pass == num_passes - 1 && i_source < num_sources_to_peel { - // We currently can only do DI calibration on the CPU. Copy the visibilities back to the host. - let vis = d_high_res_vis_tfb.copy_from_device_new()?; - - // *** UGLY HACK *** - let mut d_high_res_model_rotated: DevicePointer> = - DevicePointer::malloc(d_high_res_model_tfb.get_size())?; - d_high_res_model_rotated.clear(); - gpu_kernel_call!( - gpu::add_model, - d_high_res_model_rotated.get_mut().cast(), - d_high_res_model_tfb.get().cast(), - //iono_consts.0 as GpuFloat, - //iono_consts.1 as GpuFloat, - gpu_iono_consts, - d_lambdas.get(), - d_uvws_to.get(), - num_timesteps_i32, - num_high_res_chans_i32, - num_cross_baselines_i32, - )?; - let model = d_high_res_model_rotated.copy_from_device_new()?; - - let mut di_jones = - Array3::from_elem((1, num_tiles, num_high_res_chans), Jones::identity()); - let shape = (num_timesteps, num_high_res_chans, num_cross_baselines); - let pb = ProgressBar::hidden(); - let di_cal_results = calibrate_timeblock( - ArrayView3::from_shape(shape, &vis).expect("correct shape"), - ArrayView3::from_shape(shape, &model).expect("correct shape"), - di_jones.view_mut(), - timeblock, - high_res_chanblocks, - 50, - 1e-8, - 1e-4, - obs_context.polarisations, - pb, - true, - ); - if di_cal_results.into_iter().all(|r| r.converged) { - // Apply. - } - } - - // The new phase centre becomes the old one. - std::mem::swap(&mut d_uvws_from, &mut d_uvws_to); - - peel_progress.inc(1); - } - - let num_good_sources = (num_sources_to_iono_subtract - pass_issues) as f64; - if num_good_sources > 0. { - let msg = format!( - "t{:3} pass {:2} ma {:+7.2e} mb {:+7.2e} mg {:+7.2e}", - timeblock.index, - pass + 1, - pass_alpha_mag / num_good_sources, - pass_bega_mag / num_good_sources, - pass_gain_mag / num_good_sources - ); - if pass_issues > 0 { - pb_warn!("[peel_gpu] {} ({} issues)", msg, pass_issues); - } else { - pb_info!("[peel_gpu] {} (no issues)", msg); - } - } else { - pb_warn!( - "[peel_gpu] t{:03} pass {:2} all sources had issues", - timeblock.index, - pass + 1 - ); - } - - // Rotate back to the phase centre. - gpu_kernel_call!( - gpu::xyzs_to_uvws, - d_xyzs.get(), - d_lmsts.get(), - d_uvws_to.get_mut(), - gpu::RADec { - ra: obs_context.phase_centre.ra as GpuFloat, - dec: obs_context.phase_centre.dec as GpuFloat, - }, - num_tiles_i32, - num_cross_baselines_i32, - num_timesteps_i32 - )?; - - gpu_kernel_call!( - gpu::rotate, - d_high_res_vis_tfb.get_mut().cast(), - num_timesteps_i32, - num_cross_baselines_i32, - num_high_res_chans_i32, - d_uvws_from.get(), - d_uvws_to.get(), - d_lambdas.get() - )?; - } - - // copy results back to host - d_high_res_vis_tfb.copy_from_device(vis_residual_tfb.as_slice_mut().unwrap())?; - } - - let mut pass_counts = HashMap::::new(); - for bad_source in bad_sources.iter() { - *pass_counts - .entry(bad_source.source_name.clone()) - .or_default() += 1; - } - let mut printed = HashSet::::new(); - bad_sources.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal)); - for bad_source in bad_sources.into_iter() { - let BadSource { - gpstime, - // pass, - i_source, - source_name, - alpha, - beta, - gain, - .. - } = bad_source; - // if source_name is in printed - if printed.contains(&source_name) { - continue; - } else { - printed.insert(source_name.clone()); - } - let passes = pass_counts[&source_name]; - - let pos = source_weighted_positions[i_source]; - let RADec { ra, dec } = pos; - pb_warn!( - "[peel_gpu] Bad source: {:.2} p{:2} {} at radec({:+7.2}, {:+7.2}) iono({:+8.6},{:+8.6},{:+3.2})", - gpstime, - passes, - source_name, - (ra + 180.) % 360. - 180., - dec, - alpha, - beta, - gain - ); - let matches = source_list.search_asec(pos, 300.); - for (sep, src_name, idx, comp) in matches { - let compstr = match comp.comp_type { - ComponentType::Gaussian { maj, min, pa } => format!( - "G {:6.1}as {:6.1}as {:+6.1}d", - maj.to_degrees() * 3600., - min.to_degrees() * 3600., - pa.to_degrees() - ), - ComponentType::Point => format!("P {:8} {:8} {:7}", "", "", ""), - _ => format!("? {:8} {:8} {:7}", "", "", ""), - }; - let fluxstr = match comp.flux_type { - FluxDensityType::CurvedPowerLaw { - si, - fd: FluxDensity { freq, i, .. }, - q, - } => format!( - "cpl S={:+6.2}(νn)^{:+5.2} exp[{:+5.2}ln(νn)]; @{:.1}MHz", - i, - si, - q, - freq / 1e6 - ), - FluxDensityType::PowerLaw { - si, - fd: FluxDensity { freq, i, .. }, - } => format!("pl S={:+6.2}(νn)^{:+5.2}; @{:.1}MHz", i, si, freq / 1e6), - FluxDensityType::List(fds) => { - let FluxDensity { i, freq, .. } = fds[0]; - format!("lst S={:+6.2} @{:.1}MHz", i, freq / 1e6) - } - }; - pb_warn!( - "[peel_gpu] {sep:5.1} {src_name:16} c{idx:2} at radec({:+7.2},{:+7.2}) comp({}) flx ({})", - (comp.radec.ra + 180.) % 360. - 180., - comp.radec.dec, - compstr, - fluxstr, - ); - } - } - - Ok(()) -} - /// Just the W terms of [`UVW`] coordinates. #[derive(Clone, Copy, Default, PartialEq, Debug)] struct W(f64); @@ -2696,6 +1841,10 @@ fn joiner_thread<'a>( } } +#[cfg(any(feature = "cuda", feature = "hip"))] +use crate::model::SkyModellerGpu; +// use SkyModellerGpu + #[allow(clippy::too_many_arguments)] fn peel_thread( beam: &dyn Beam, @@ -2706,16 +1855,10 @@ fn peel_thread( num_loops: NonZeroUsize, obs_context: &ObsContext, unflagged_tile_xyzs: &[XyzGeodetic], - uvw_min_metres: f64, - uvw_max_metres: f64, - short_baseline_sigma: f64, + peel_weight_params: &PeelWeightParams, convergence: f64, tile_baseline_flags: &TileBaselineFlags, - array_position: LatLngHeight, - dut1: Duration, chanblocks: &[Chanblock], - all_fine_chan_lambdas_m: &[f64], - low_res_freqs_hz: &[f64], low_res_lambdas_m: &[f64], apply_precession: bool, output_vis_params: Option<&OutputVisParams>, @@ -2726,7 +1869,8 @@ fn peel_thread( multi_progress: &MultiProgress, overall_peel_progress: &ProgressBar, ) -> Result<(), PeelError> { - let mut baseline_weights = None; + let array_position = obs_context.array_position; + let dut1 = obs_context.dut1.unwrap_or_default(); for (i, (mut vis_residual_tfb, vis_weights_tfb, timeblock)) in rx_full_residual.iter().enumerate() @@ -2736,34 +1880,18 @@ fn peel_thread( return Ok(()); } - let baseline_weights = baseline_weights.get_or_insert_with(|| { - let mut baseline_weights = Vec1::try_from_vec(vec![ - 1.0; - tile_baseline_flags - .unflagged_cross_baseline_to_tile_map - .len() - ]) - .expect("not possible to have no unflagged tiles here"); - let uvws = xyzs_to_cross_uvws( - unflagged_tile_xyzs, - obs_context.phase_centre.to_hadec(get_lmst( - array_position.longitude_rad, - *timeblock.timestamps.first(), - dut1, - )), - ); - assert_eq!(baseline_weights.len(), uvws.len()); - for (UVW { u, v, w }, baseline_weight) in - uvws.into_iter().zip(baseline_weights.iter_mut()) - { - let uvw_length = (u.powi(2) + v.powi(2) + w.powi(2)).sqrt(); - if uvw_length < uvw_min_metres || uvw_length > uvw_max_metres { - *baseline_weight = 0.0; - } - } + // /////// // + // WEIGHTS // + // /////// // - baseline_weights - }); + let vis_weights_tfb = peel_weight_params.apply_tfb( + vis_weights_tfb, + obs_context, + timeblock, + apply_precession, + chanblocks, + tile_baseline_flags, + ); let all_fine_chan_freqs_hz = chanblocks.iter().map(|c| c.freq).collect::>(); let mut iono_consts = vec![IonoConsts::default(); num_sources_to_iono_subtract]; @@ -2792,16 +1920,12 @@ fn peel_thread( source_weighted_positions, num_passes.get(), num_loops.get(), - short_baseline_sigma, convergence, - low_res_freqs_hz, - all_fine_chan_lambdas_m, + chanblocks, low_res_lambdas_m, obs_context, - array_position, - unflagged_tile_xyzs, + tile_baseline_flags, &mut high_res_modeller, - dut1, !apply_precession, multi_progress, )?; @@ -2831,17 +1955,12 @@ fn peel_thread( source_weighted_positions, num_passes.get(), num_loops.get(), - short_baseline_sigma, convergence, chanblocks, - all_fine_chan_lambdas_m, low_res_lambdas_m, obs_context, - array_position, - unflagged_tile_xyzs, - baseline_weights, + tile_baseline_flags, &mut high_res_modeller, - dut1, !apply_precession, multi_progress, )?; diff --git a/src/params/peel/tests.rs b/src/params/peel/tests.rs index b70c373e..1417564a 100644 --- a/src/params/peel/tests.rs +++ b/src/params/peel/tests.rs @@ -1432,7 +1432,7 @@ fn test_peel_single_source(peel_type: PeelType) { let num_tiles = obs_context.get_total_num_tiles(); let num_times = obs_context.timestamps.len(); let num_baselines = (num_tiles * (num_tiles - 1)) / 2; - let flagged_tiles = HashSet::new(); + let flagged_tiles: HashSet<_> = obs_context.flagged_tiles.iter().cloned().collect(); let num_chans = obs_context.fine_chan_freqs.len(); let chanblocks = obs_context @@ -1501,18 +1501,34 @@ fn test_peel_single_source(peel_type: PeelType) { let vis_shape = vis_residual_obs_tfb.dim(); let vis_weights = Array3::::ones(vis_shape); let source_weighted_positions = [source_radec]; - let baseline_weights = vec![1.0; vis_model_obs_tfb.len_of(Axis(2))]; let multi_progress = MultiProgress::with_draw_target(ProgressDrawTarget::hidden()); + let peel_weight_params = PeelWeightParams { + uvw_min_metres: 0.0, + uvw_max_metres: f64::MAX, + short_baseline_sigma: SHORT_BASELINE_SIGMA, + }; + + let tile_baseline_flags = TileBaselineFlags::new(num_tiles, flagged_tiles); + for apply_precession in [false, true] { + let vis_weights = peel_weight_params.apply_tfb( + vis_weights.clone(), + &obs_context, + &timeblock, + apply_precession, + &chanblocks, + &tile_baseline_flags, + ); + let mut high_res_modeller = new_sky_modeller( &beam, &source_list, Polarisations::default(), &obs_context.tile_xyzs, &fine_chan_freqs_hz, - &flagged_tiles, + &tile_baseline_flags.flagged_tiles, obs_context.phase_centre, array_pos.longitude_rad, array_pos.latitude_rad, @@ -1585,16 +1601,12 @@ fn test_peel_single_source(peel_type: PeelType) { &source_weighted_positions, NUM_PASSES, NUM_LOOPS, - SHORT_BASELINE_SIGMA, CONVERGENCE, - &fine_chan_freqs_hz, - &lambdas_m, + &chanblocks, &low_res_lambdas_m, &obs_context, - obs_context.array_position, - &obs_context.tile_xyzs, + &tile_baseline_flags, &mut *high_res_modeller, - obs_context.dut1.unwrap_or_default(), !apply_precession, &multi_progress, ) @@ -1608,7 +1620,7 @@ fn test_peel_single_source(peel_type: PeelType) { Polarisations::default(), &obs_context.tile_xyzs, &fine_chan_freqs_hz, - &flagged_tiles, + &tile_baseline_flags.flagged_tiles, obs_context.phase_centre, array_pos.longitude_rad, array_pos.latitude_rad, @@ -1626,17 +1638,12 @@ fn test_peel_single_source(peel_type: PeelType) { &source_weighted_positions, NUM_PASSES, NUM_LOOPS, - SHORT_BASELINE_SIGMA, CONVERGENCE, &chanblocks, - &lambdas_m, &low_res_lambdas_m, &obs_context, - obs_context.array_position, - &obs_context.tile_xyzs, - &baseline_weights, + &tile_baseline_flags, &mut high_res_modeller, - obs_context.dut1.unwrap_or_default(), !apply_precession, &multi_progress, ) @@ -1698,7 +1705,15 @@ fn test_peel_multi_source(peel_type: PeelType) { let num_tiles = obs_context.get_total_num_tiles(); let num_times = obs_context.timestamps.len(); let num_baselines = (num_tiles * (num_tiles - 1)) / 2; - let flagged_tiles = HashSet::new(); + let flagged_tiles: HashSet<_> = obs_context.flagged_tiles.iter().cloned().collect(); + + let tile_baseline_flags = TileBaselineFlags::new(num_tiles, flagged_tiles); + + let peel_weight_params = PeelWeightParams { + uvw_min_metres: 0.0, + uvw_max_metres: f64::MAX, + short_baseline_sigma: SHORT_BASELINE_SIGMA, + }; let num_chans = obs_context.fine_chan_freqs.len(); let chanblocks = obs_context @@ -1740,19 +1755,6 @@ fn test_peel_multi_source(peel_type: PeelType) { let source_midpoint = RADec::from_hadec(HADec::from_radians(0., array_pos.latitude_rad), lst_0h_rad); - let baseline_weights = { - let uvw_lengths = xyzs_to_cross_uvws( - obs_context.tile_xyzs.as_slice(), - obs_context.phase_centre.to_hadec(lst_0h_rad), - ) - .iter() - .map(|&UVW { u, v, .. }| (u.powi(2) + v.powi(2)).sqrt()) - .collect_vec(); - // let uvw_max = uvw_lengths.iter().copied().fold(f64::NAN, f64::max); - // println!("uvw_max: {}", uvw_max); - uvw_lengths.iter().map(|_| 1.).collect_vec() - }; - // observation: this test passes with sources 30 degrees apart, and failes with them 0.05 degrees apart let source_list = SourceList::from(indexmap! { // remember, radec is in radians @@ -1818,6 +1820,15 @@ fn test_peel_multi_source(peel_type: PeelType) { let multi_progress = MultiProgress::with_draw_target(ProgressDrawTarget::hidden()); for apply_precession in [true, false] { + let vis_weights = peel_weight_params.apply_tfb( + vis_weights.clone(), + &obs_context, + &timeblock, + apply_precession, + &chanblocks, + &tile_baseline_flags, + ); + let (average_lmst, _average_latitude) = if !apply_precession { let average_timestamp = timeblock.median; ( @@ -1849,7 +1860,7 @@ fn test_peel_multi_source(peel_type: PeelType) { Polarisations::default(), &obs_context.tile_xyzs, &fine_chan_freqs_hz, - &flagged_tiles, + &tile_baseline_flags.flagged_tiles, obs_context.phase_centre, array_pos.longitude_rad, array_pos.latitude_rad, @@ -1929,16 +1940,12 @@ fn test_peel_multi_source(peel_type: PeelType) { &source_weighted_positions, NUM_PASSES, NUM_LOOPS, - SHORT_BASELINE_SIGMA, CONVERGENCE, - &fine_chan_freqs_hz, - &lambdas_m, + &chanblocks, &low_res_lambdas_m, &obs_context, - obs_context.array_position, - &obs_context.tile_xyzs, + &tile_baseline_flags, &mut *high_res_modeller, - obs_context.dut1.unwrap_or_default(), !apply_precession, &multi_progress, ) @@ -1952,7 +1959,7 @@ fn test_peel_multi_source(peel_type: PeelType) { Polarisations::default(), &obs_context.tile_xyzs, &fine_chan_freqs_hz, - &flagged_tiles, + &tile_baseline_flags.flagged_tiles, obs_context.phase_centre, array_pos.longitude_rad, array_pos.latitude_rad, @@ -1970,17 +1977,12 @@ fn test_peel_multi_source(peel_type: PeelType) { &source_weighted_positions, NUM_PASSES, NUM_LOOPS, - SHORT_BASELINE_SIGMA, CONVERGENCE, &chanblocks, - &lambdas_m, &low_res_lambdas_m, &obs_context, - obs_context.array_position, - &obs_context.tile_xyzs, - baseline_weights.as_slice(), + &tile_baseline_flags, &mut high_res_modeller, - obs_context.dut1.unwrap_or_default(), !apply_precession, &multi_progress, ) @@ -2070,7 +2072,7 @@ mod gpu_tests { use super::*; use crate::{ - gpu::{self, DevicePointer, GpuFloat}, + gpu::{self, gpu_kernel_call, DevicePointer, GpuError, GpuFloat}, model::SkyModellerGpu, };