Skip to content

Commit

Permalink
Detect and use the CRAM tile with a custom beam response.
Browse files Browse the repository at this point in the history
The way this works is to augment the existing de-duplication code by saying that
the CRAM tile is a unique tile, then writing its beam responses to the array of
all responses.

One limitation of this approach is the number of frequencies used in the
responses; the FEE beam typically only has 24 unique frequencies in an
observation, meaning the array of all beam responses only has a length of 24 for
the frequency dimension. But the CRAM uses an analytic beam, which defines a
different response for every frequency. Therefore using the FEE beam with a CRAM
tile will make the CRAM beam responses much coarser than they could be.

This work isn't thoroughly tested, as I've run out of time.
  • Loading branch information
cjordan committed Oct 31, 2023
1 parent 42b9cf7 commit bdc1c64
Show file tree
Hide file tree
Showing 22 changed files with 600 additions and 124 deletions.
17 changes: 17 additions & 0 deletions mdbook/src/defs/beam.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,20 @@ Beam code usually does not error, but if it does it's likely because:
2. There aren't exactly 16 or 32 dipole gains per tile; or
3. There's something wrong with the FEE HDF5 file. The official file is well
tested.

## CRAM tile

[The CRAM tile](https://www.mwatelescope.org/science/eor/cram/) is a special
64-bowtie tile, as opposed to the usual 16-bowtie tiles. `hyperdrive` attempts
to detect and simulate the CRAM's beam response using the RTS-flavoured analytic
beam (using 64 bowties).

Currently, it is anticipated that the CRAM does not ever point away from zenith,
so no bowtie/dipole delays are accepted. Metafits files also have no way of
specifying dead dipoles in the CRAM (to my knowledge), so `hyperdrive` allows
users to specify bowtie gains on the command line with `--cram-dipole-gains`
(64 values, 1 for "alive", 0 for "dead", see [dead dipoles](mwa/dead_dipoles.md)
for more info). `hyperdrive` will automatically detect a tile named "CRAM"
and use the special beam responses for it, but a different tile can be elected
with `--cram-tile`. Finally, the presence of the CRAM tile can be ignored with
`--ignore-cram`, but this is not likely to do anything useful.
170 changes: 144 additions & 26 deletions src/beam/analytic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,18 @@ use ndarray::prelude::*;
use super::{partial_to_full, validate_delays, Beam, BeamError, BeamType, Delays};

#[cfg(any(feature = "cuda", feature = "hip"))]
use super::{BeamGpu, DevicePointer, GpuFloat};
use super::{BeamGpu, DevicePointer, GpuFloat, GpuJones};

#[cfg(feature = "cuda")]
use cuda_runtime_sys::{
cudaMemcpy as gpuMemcpy, cudaMemcpyKind::cudaMemcpyDeviceToHost as gpuMemcpyDeviceToHost,
cudaMemcpyKind::cudaMemcpyHostToDevice as gpuMemcpyHostToDevice,
};
#[cfg(feature = "hip")]
use hip_sys::hiprt::{
hipMemcpy as gpuMemcpy, hipMemcpyKind::hipMemcpyDeviceToHost as gpuMemcpyDeviceToHost,
hipMemcpyKind::hipMemcpyHostToDevice as gpuMemcpyHostToDevice,
};

/// A wrapper of the `AnalyticBeam` struct in hyperbeam that implements the
/// [`Beam`] trait.
Expand All @@ -21,31 +32,37 @@ pub(crate) struct AnalyticBeam {
analytic_type: AnalyticType,
delays: Array2<u32>,
gains: Array2<f64>,
ideal_delays: [u32; 16],
ideal_delays: Box<[u32; 16]>,

/// If there's CRAM tile info, this is the tile index and dipole gains.
cram_tile: Option<(usize, Box<[f64; 64]>)>,
}

impl AnalyticBeam {
pub(crate) fn new_mwa_pb(
num_tiles: usize,
delays: Delays,
gains: Option<Array2<f64>>,
cram_tile: Option<(usize, Box<[f64; 64]>)>,
) -> Result<AnalyticBeam, BeamError> {
Self::new_inner(AnalyticType::MwaPb, num_tiles, delays, gains)
Self::new_inner(AnalyticType::MwaPb, num_tiles, delays, gains, cram_tile)
}

pub(crate) fn new_rts(
num_tiles: usize,
delays: Delays,
gains: Option<Array2<f64>>,
cram_tile: Option<(usize, Box<[f64; 64]>)>,
) -> Result<AnalyticBeam, BeamError> {
Self::new_inner(AnalyticType::Rts, num_tiles, delays, gains)
Self::new_inner(AnalyticType::Rts, num_tiles, delays, gains, cram_tile)
}

fn new_inner(
at: AnalyticType,
num_tiles: usize,
delays: Delays,
gains: Option<Array2<f64>>,
cram_tile: Option<(usize, Box<[f64; 64]>)>,
) -> Result<AnalyticBeam, BeamError> {
// Check that the delays are sensible.
validate_delays(&delays, num_tiles)?;
Expand Down Expand Up @@ -89,7 +106,8 @@ impl AnalyticBeam {
analytic_type: at,
delays,
gains,
ideal_delays,
ideal_delays: Box::new(ideal_delays),
cram_tile,
})
}

Expand Down Expand Up @@ -163,8 +181,8 @@ impl Beam for AnalyticBeam {
self.delays.len_of(Axis(0))
}

fn get_ideal_dipole_delays(&self) -> Option<[u32; 16]> {
Some(self.ideal_delays)
fn get_ideal_dipole_delays(&self) -> Option<&[u32; 16]> {
Some(&self.ideal_delays)
}

fn get_dipole_delays(&self) -> Option<ArcArray<u32, Dim<[usize; 2]>>> {
Expand Down Expand Up @@ -206,7 +224,7 @@ impl Beam for AnalyticBeam {
} else {
let delays = &self.ideal_delays;
let amps = [1.0; 32];
let j = self.calc_jones_inner(azel, freq_hz, delays, &amps, latitude_rad)?;
let j = self.calc_jones_inner(azel, freq_hz, delays.as_slice(), &amps, latitude_rad)?;
Ok(j)
}
}
Expand Down Expand Up @@ -251,7 +269,14 @@ impl Beam for AnalyticBeam {
} else {
let delays = &self.ideal_delays;
let amps = [1.0; 32];
self.calc_jones_array_inner(azels, freq_hz, delays, &amps, latitude_rad, results)?;
self.calc_jones_array_inner(
azels,
freq_hz,
delays.as_slice(),
&amps,
latitude_rad,
results,
)?;
}
Ok(())
}
Expand All @@ -262,6 +287,10 @@ impl Beam for AnalyticBeam {

fn empty_coeff_cache(&self) {}

fn get_cram_tile(&self) -> Option<(usize, &[f64; 64])> {
self.cram_tile.as_ref().map(|(i, g)| (*i, &**g))
}

#[cfg(any(feature = "cuda", feature = "hip"))]
fn prepare_gpu_beam(&self, freqs_hz: &[u32]) -> Result<Box<dyn BeamGpu>, BeamError> {
let gpu_beam = unsafe {
Expand All @@ -270,45 +299,134 @@ impl Beam for AnalyticBeam {
};
let freq_map = (0..freqs_hz.len()).map(|i| i as i32).collect::<Vec<_>>();
let d_freq_map = DevicePointer::copy_to_device(&freq_map)?;

// hyperbeam only knows about normal tiles, not the CRAM tile. The
// current approach of dealing with the CRAM tile is to consider it
// as the last tile. This means that the tile map out of hyperbeam is
// wrong. Copy and edit hyperbeam's tile map and use our own.
let total_num_tiles = gpu_beam.get_total_num_tiles();
let (gpu_cram_beam, tile_map) =
if let Some((i_cram_tile, cram_amps)) = self.cram_tile.as_ref() {
let d_hyperbeam_tile_map = gpu_beam.get_device_tile_map();
let mut hyperbeam_tile_map = vec![0; total_num_tiles];
unsafe {
gpuMemcpy(
hyperbeam_tile_map.as_mut_ptr().cast(),
d_hyperbeam_tile_map.cast(),
total_num_tiles * std::mem::size_of::<i32>(),
gpuMemcpyDeviceToHost,
);
}

// Adjust the map for the CRAM tile.
hyperbeam_tile_map[*i_cram_tile] =
hyperbeam_tile_map.iter().copied().max().expect("not empty") + 1;
let tile_map = DevicePointer::copy_to_device(&hyperbeam_tile_map)?;

// Set up a new beam object, RTS style because mwa_pb doesn't
// look as good.
use mwa_hyperbeam::analytic::AnalyticBeam;
let at = AnalyticType::Rts;
let cram_beam = AnalyticBeam::new_custom(at, at.get_default_dipole_height(), 8);
let gpu_cram_beam = unsafe {
cram_beam.gpu_prepare(
Array2::zeros((1, 64)).view(),
ArrayView2::from_shape((1, 64), cram_amps.as_slice()).expect("valid"),
)?
};

(Some(gpu_cram_beam), tile_map)
} else {
// No adjustment needed, but we need to own the tile map, so copy
// it.
let our_map = vec![0; gpu_beam.get_total_num_tiles()];
let mut d_our_map = DevicePointer::copy_to_device(&our_map)?;
unsafe {
gpuMemcpy(
d_our_map.get_mut().cast(),
our_map.as_ptr().cast(),
total_num_tiles * std::mem::size_of::<i32>(),
gpuMemcpyHostToDevice,
);
}

(None, d_our_map)
};

Ok(Box::new(AnalyticBeamGpu {
hyperbeam_object: gpu_beam,
d_freqs_hz: DevicePointer::copy_to_device(freqs_hz)?,
d_freq_map,
freqs_hz: DevicePointer::copy_to_device(freqs_hz)?,
freq_map: d_freq_map,
tile_map,
cram_beam: gpu_cram_beam,
}))
}
}

#[cfg(any(feature = "cuda", feature = "hip"))]
struct AnalyticBeamGpu {
hyperbeam_object: mwa_hyperbeam::analytic::AnalyticBeamGpu,
d_freqs_hz: DevicePointer<u32>,
d_freq_map: DevicePointer<i32>,
freqs_hz: DevicePointer<u32>,
freq_map: DevicePointer<i32>,
tile_map: DevicePointer<i32>,

/// If this is set, then there is a CRAM tile present, and this object will
/// be used to supply beam responses.
cram_beam: Option<mwa_hyperbeam::analytic::AnalyticBeamGpu>,
}

#[cfg(any(feature = "cuda", feature = "hip"))]
impl BeamGpu for AnalyticBeamGpu {
unsafe fn calc_jones_pair(
&self,
az_rad: &[GpuFloat],
za_rad: &[GpuFloat],
d_az_rad: &DevicePointer<GpuFloat>,
d_za_rad: &DevicePointer<GpuFloat>,
latitude_rad: f64,
d_jones: *mut std::ffi::c_void,
d_jones: &mut DevicePointer<GpuJones>,
) -> Result<(), BeamError> {
let d_az_rad = DevicePointer::copy_to_device(az_rad)?;
let d_za_rad = DevicePointer::copy_to_device(za_rad)?;
let num_directions = d_az_rad
.get_num_elements()
.try_into()
.expect("not bigger than i32::MAX");
self.hyperbeam_object.calc_jones_device_pair_inner(
d_az_rad.get(),
d_za_rad.get(),
az_rad.len().try_into().expect("not bigger than i32::MAX"),
self.d_freqs_hz.get(),
self.d_freqs_hz
num_directions,
self.freqs_hz.get(),
self.freqs_hz
.get_num_elements()
.try_into()
.expect("not bigger than i32::MAX"),
latitude_rad as GpuFloat,
true,
d_jones,
d_jones.get_mut().cast(),
)?;

// Overwrite beam responses for the tile corresponding to the CRAM tile,
// if we have that info.
if let Some(cram_beam) = self.cram_beam.as_ref() {
if !matches!(self.get_beam_type(), BeamType::None) {
cram_beam.calc_jones_device_pair_inner(
d_az_rad.get(),
d_za_rad.get(),
num_directions,
self.freqs_hz.get(),
self.freqs_hz.get_num_elements().try_into().expect("valid"),
latitude_rad as GpuFloat,
true,
// The CRAM tile always comes last.
d_jones
.get_mut()
.add(
self.hyperbeam_object.get_num_unique_tiles() as usize
* self.freqs_hz.get_num_elements()
* d_az_rad.get_num_elements(),
)
.cast(),
)?;
}
}

Ok(())
}

Expand All @@ -317,19 +435,19 @@ impl BeamGpu for AnalyticBeamGpu {
}

fn get_tile_map(&self) -> *const i32 {
self.hyperbeam_object.get_device_tile_map()
self.tile_map.get()
}

fn get_freq_map(&self) -> *const i32 {
self.d_freq_map.get()
self.freq_map.get()
}

fn get_num_unique_tiles(&self) -> i32 {
self.hyperbeam_object.get_num_unique_tiles()
self.hyperbeam_object.get_num_unique_tiles() + if self.cram_beam.is_some() { 1 } else { 0 }
}

fn get_num_unique_freqs(&self) -> i32 {
self.d_freqs_hz
self.freqs_hz
.get_num_elements()
.try_into()
.expect("not bigger than i32::MAX")
Expand Down
14 changes: 14 additions & 0 deletions src/beam/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,20 @@ pub enum BeamError {
#[error("Got tile index {got}, but the biggest tile index is {max}")]
BadTileIndex { got: usize, max: usize },

#[error("Got tile index {got} for the CRAM tile, but the biggest tile index is {max}")]
BadCramTileIndex { got: usize, max: usize },

#[error(
"Got tile name '{0}' for the CRAM tile, but this name was not in the list of tile names"
)]
BadTileNameForCram(String),

#[error("Got tile name '{0}' for the CRAM tile, but no tile names were available to match against. Tile names are needed to determine the tile index")]
NoTileNamesForCram(String),

#[error("CRAM dipole gains were specified, but 64 values are needed")]
Not64CramDipoleGains,

#[error("hyperbeam FEE error: {0}")]
HyperbeamFee(#[from] mwa_hyperbeam::fee::FEEBeamError),

Expand Down
Loading

0 comments on commit bdc1c64

Please sign in to comment.