CIRA-Pulsars-and-Transients-Group · bwmeyers · Jun 6, 2024 · May 17, 2024 · May 17, 2024 · May 17, 2024
diff --git a/app/make_mwa_tied_array_beam.c b/app/make_mwa_tied_array_beam.c
@@ -54,7 +54,6 @@ struct make_tied_array_beam_opts {
     char              *synth_filter;     // Which synthesis filter to use
     bool               smart;            // Use legacy settings for PFB
     int                max_sec_per_file; // Number of seconds per fits files
-    //float              gpu_mem_GB;       // Default = 0.0. If 0.0 use all GPU mem
     int                nchunks;          // Split each second into this many processing chunks
 };
 
@@ -117,7 +116,7 @@ int main(int argc, char **argv)
     }
 
     vm->cal.metafits     = strdup( opts.cal_metafits );
-    vm->cal.ref_ant      = strdup( opts.ref_ant );
+    vm->cal.ref_ant      = opts.ref_ant ? strdup( opts.ref_ant ) : NULL;
     vm->cal.phase_offset = opts.phase_offset;
     vm->cal.phase_slope  = opts.phase_slope;
     vm->cal.custom_pq_correction = opts.custom_pq_correction;
@@ -138,9 +137,10 @@ int main(int argc, char **argv)
 
     // Calculate the actual start time of the dataset to be processed
     unsigned int p;
-    double mjd, sec_offset;
+    double mjd, mjd_start, sec_offset;
     sec_offset = vm->gps_seconds_to_process[0] - (vm->obs_metadata->sched_start_gps_time_ms / 1000.0);
-    mjd = vm->obs_metadata->sched_start_mjd + (sec_offset / 86400.0);
+    mjd_start = vm->obs_metadata->sched_start_mjd;
+    mjd = mjd_start + (sec_offset / 86400.0);
 
     for (p = 0; p < vm->npointing; p++)
         calc_beam_geom( vm->ras_hours[p], vm->decs_degs[p], mjd, &beam_geom_vals[p] );
@@ -150,7 +150,7 @@ int main(int argc, char **argv)
     uintptr_t npols          = vm->obs_metadata->num_ant_pols;
     unsigned int nsamples    = vm->fine_sample_rate;
 
-    cuDoubleComplex  ****detected_beam;
+    cuDoubleComplex  *data_buffer_fine;
 
     if (vm->do_inverse_pfb)
     {
@@ -161,8 +161,10 @@ int main(int argc, char **argv)
         vmLoadFilter( vm, opts.synth_filter, SYNTHESIS_FILTER, nchans );
         vmScaleFilterCoeffs( vm, SYNTHESIS_FILTER, 15.0/7.2 ); // (1/7.2) = 16384/117964.8
 
-        // Allocate memory for various data products
-        detected_beam = create_detected_beam( vm->npointing, 2*nsamples, nchans, npols );
+        // Allocate memory for voltage buffer that will be used to perform the IPFB
+        // The buffer is 2 seconds long to allow the synthesis filter to operate over
+        // the transition between seconds
+        data_buffer_fine = create_data_buffer_fine( vm->npointing, 2*nsamples, nchans, npols );
     }
 
     /*********************
@@ -171,6 +173,8 @@ int main(int argc, char **argv)
 
     /* Allocate host and device memory for the use of the cu_form_beam function */
     // Declaring pointers to the structs so the memory can be alternated
+    vmSetMaxGPUMem( vm, opts.nchunks );
+
     vmMallocVDevice( vm );
     vmMallocJVDevice( vm );
     vmMallocEHost( vm );
@@ -232,7 +236,7 @@ int main(int argc, char **argv)
     // ------------------
 
     // Populate the relevant header structs
-    vmPopulateVDIFHeader( vm, beam_geom_vals );
+    vmPopulateVDIFHeader( vm, beam_geom_vals, mjd_start, sec_offset );
 
     // Begin the main loop: go through data one second at a time
 
@@ -274,8 +278,11 @@ int main(int argc, char **argv)
 
             vmPullE( vm );
 
-            prepare_detected_beam( detected_beam, mpfs, vm );
-            cu_invert_pfb( detected_beam, timestep_idx, vm->npointing,
+            // Load the voltage data into the buffer
+            prepare_data_buffer_fine( data_buffer_fine, vm, timestep_idx );
+
+            // Run the iPFB kernel
+            cu_invert_pfb( data_buffer_fine, timestep_idx, vm->npointing,
                     nsamples, nchans, npols, vm->vf->sizeof_buffer,
                     &gi, data_buffer_vdif );
 
@@ -327,7 +334,7 @@ int main(int argc, char **argv)
 
     if (vm->do_inverse_pfb)
     {
-        destroy_detected_beam( detected_beam, vm->npointing, 2*nsamples, nchans );
+        free( data_buffer_fine );
     }
 
     if (vm->do_inverse_pfb)
@@ -454,8 +461,6 @@ void usage()
             "\t-h, --help                 Print this help and exit\n"
             "\t-V, --version              Print version number and exit\n\n"
           );
-// This option is currently too problematic to deal with:
-//            "\t-g, --gpu-mem=N            The maximum amount of GPU memory you want make_beam to use in GB [default: -1]\n"
 }
 
 
@@ -476,7 +481,6 @@ void make_tied_array_beam_parse_cmdline(
     opts->analysis_filter      = NULL;
     opts->synth_filter         = NULL;
     opts->max_sec_per_file     = 200;   // Number of seconds per fits files
-    //opts->gpu_mem_GB           = 0.0;
     opts->custom_flags         = NULL;
     opts->nchunks              = 1;
     opts->smart                = false;
@@ -517,7 +521,6 @@ void make_tied_array_beam_parse_cmdline(
                 {"cross-terms",     no_argument,       0, 'X'},
                 {"PQ-phase",        required_argument, 0, 'U'},
                 {"offringa",        no_argument      , 0, 'O'},
-                //{"gpu-mem",         required_argument, 0, 'g'},
                 {"nchunks",         required_argument, 0, 'n'},
                 {"smart",           no_argument,       0, 's'},
                 {"help",            required_argument, 0, 'h'},
@@ -526,7 +529,7 @@ void make_tied_array_beam_parse_cmdline(
 
             int option_index = 0;
             c = getopt_long( argc, argv,
-                             "A:b:Bc:C:d:e:f:F:hm:nN:OpP:R:sS:t:T:U:vVX",
+                             "A:b:Bc:C:d:e:f:F:hm:n:N:OpP:R:sS:t:T:U:vVX",
                              long_options, &option_index);
             if (c == -1)
                 break;
@@ -564,9 +567,6 @@ void make_tied_array_beam_parse_cmdline(
                     opts->custom_flags = (char *)malloc( strlen(optarg) + 1 );
                     strcpy( opts->custom_flags, optarg );
                     break;
-                //case 'g':
-                //    opts->gpu_mem_GB = atof(optarg);
-                //    break;
                 case 'h':
                     usage();
                     exit(EXIT_SUCCESS);

diff --git a/include/vcsbeam.h.in b/include/vcsbeam.h.in
@@ -757,7 +757,7 @@ void vmFreePQIdxsHost( vcsbeam_context *vm );
 void vmFreePQIdxsDevice( vcsbeam_context *vm );
 
 
-void vmSetMaxGPUMem( vcsbeam_context *vm, uintptr_t max_gpu_mem_bytes );
+void vmSetMaxGPUMem( vcsbeam_context *vm, int nchunks );
 void vmPushChunk( vcsbeam_context *vm );
 void vmPushJ( vcsbeam_context *vm );
 
@@ -968,7 +968,7 @@ struct gpu_ipfb_arrays
 extern "C" {
 #endif
 
-void cu_invert_pfb( cuDoubleComplex ****detected_beam, int file_no,
+void cu_invert_pfb( cuDoubleComplex *data_buffer_fine, int file_no,
                         int npointing, int nsamples, int nchan, int npol, int sizeof_buffer,
                         struct gpu_ipfb_arrays *g, float *data_buffer_uvdif );
 
@@ -1026,8 +1026,8 @@ void vmBeamformSecond( vcsbeam_context *vm );
 void vmPullE( vcsbeam_context *vm );
 void vmPullS( vcsbeam_context *vm );
 
-void prepare_detected_beam( cuDoubleComplex ****detected_beam,
-                   mpi_psrfits *mpfs, vcsbeam_context *vm );
+void prepare_data_buffer_fine( cuDoubleComplex *data_buffer_fine, vcsbeam_context *vm,
+                    uintptr_t timestep_idx );
 void vmSendSToFits( vcsbeam_context *vm, mpi_psrfits *mpfs );
 
 float *create_pinned_data_buffer( size_t size );
@@ -1039,9 +1039,7 @@ vm_error vmReadNextSecond( vcsbeam_context *vm );
 cuDoubleComplex ****create_invJi( int nstation, int nchan, int pol );
 void              destroy_invJi( cuDoubleComplex ****array, int nstation, int nchan, int npol );
 
-cuDoubleComplex ****create_detected_beam( int npointing, int nsamples, int nchan, int npol );
-void              destroy_detected_beam( cuDoubleComplex ****array, int npointing,
-                                         int nsamples, int nchan );
+cuDoubleComplex *create_data_buffer_fine( int npointing, int nsamples, int nchan, int npol );
 
 void allocate_input_output_arrays( void **data, void **d_data, size_t size );
 void free_input_output_arrays( void *data, void *d_data );
@@ -1115,9 +1113,10 @@ struct vdifinfo
     size_t samples_per_frame; // Number of time samples per vdif frame
     int dataarraylength;      // The frame length minus the header
     int bits;                 // Bits per sample
-    int nchan;                // Channels per framme
+    int nchan;                // Channels per frame
     int chan_width;           // Channel width in hertz
     int sample_rate;          // Sample rate in hertz
+    int npol;                 // Number of polarisations
     int iscomplex;            // Complex sample flag
     int threadid;             // Which thread are we
     char stationid[3];        // Which station are we
@@ -1140,7 +1139,9 @@ struct vdifinfo
 
     double BW;
 
-    long double MJD_epoch;
+    double MJD_start;
+    double sec_offset;
+    double MJD_epoch;
 
     char date_obs[24];
     char basefilename[1024];
@@ -1159,7 +1160,9 @@ void vdif_write_second( struct vdifinfo *vf, vdif_header *vhdr,
 
 void vmPopulateVDIFHeader(
         vcsbeam_context  *vm,
-        beam_geom        *beam_geom_vals );
+        beam_geom        *beam_geom_vals,
+        double           mjd_start,
+        double           sec_offset );
 
 void to_offset_binary( int8_t *i, int n );
 

diff --git a/src/beam_vdif.c b/src/beam_vdif.c
@@ -115,17 +115,27 @@ void vdif_write_data( struct vdifinfo *vf, int8_t *output )
 
     // write a CPSR2 test header for DSPSR
     char ascii_header[MWA_HEADER_SIZE] = MWA_HEADER_INIT;
-    //ascii_header_set( ascii_header, "UTC_START", "%s", vf->date_obs  );
-    ascii_header_set( ascii_header, "DATAFILE",   "%s", filename      );
-    ascii_header_set( ascii_header, "INSTRUMENT", "%s", "VDIF"        );
-    ascii_header_set( ascii_header, "TELESCOPE",  "%s", vf->telescope );
-    ascii_header_set( ascii_header, "MODE",       "%s", vf->obs_mode  );
-    ascii_header_set( ascii_header, "FREQ",       "%f", vf->fctr      );
-
-    ascii_header_set( ascii_header, "BW",         "%f", vf->BW        );
-    ascii_header_set( ascii_header, "RA",         "%s", vf->ra_str    );
-    ascii_header_set( ascii_header, "DEC",        "%s", vf->dec_str   );
-    ascii_header_set( ascii_header, "SOURCE",     "%s", vf->source    );
+
+    ascii_header_set( ascii_header, "TELESCOPE",  "%s", vf->telescope   );
+    ascii_header_set( ascii_header, "MODE",       "%s", vf->obs_mode    );
+    ascii_header_set( ascii_header, "INSTRUMENT", "%s", "VDIF"          );
+    ascii_header_set( ascii_header, "DATAFILE",   "%s", filename        );
+
+    ascii_header_set( ascii_header, "MJD_START",  "%f", vf->MJD_start   );
+    ascii_header_set( ascii_header, "MJD_EPOCH",  "%f", vf->MJD_epoch   );
+    ascii_header_set( ascii_header, "SEC_OFFSET", "%f", vf->sec_offset  );
+
+    ascii_header_set( ascii_header, "SOURCE",     "%s", vf->source      );
+    ascii_header_set( ascii_header, "RA",         "%s", vf->ra_str      );
+    ascii_header_set( ascii_header, "DEC",        "%s", vf->dec_str     );
+
+    ascii_header_set( ascii_header, "FREQ",       "%f", vf->fctr        );
+    ascii_header_set( ascii_header, "BW",         "%f", vf->BW          );
+    ascii_header_set( ascii_header, "TSAMP",      "%f", 1e6/(float) vf->sample_rate );
+
+    ascii_header_set( ascii_header, "NBIT",       "%d", vf->bits        );
+    ascii_header_set( ascii_header, "NDIM",       "%d", vf->iscomplex+1 );
+    ascii_header_set( ascii_header, "NPOL",       "%d", vf->npol        );
 
     sprintf( filename, "%s.hdr", vf->basefilename );
     fs = fopen( filename,"w" );
@@ -139,10 +149,14 @@ void vdif_write_data( struct vdifinfo *vf, int8_t *output )
  *
  * @param vm The VCSBeam context struct
  * @param beam_geom_vals A `beam_geom` struct containing pointing information
+ * @param mjd_start The start MJD of the observation
+ * @param sec_offset The offset from the start of the observation in seconds
  */
 void vmPopulateVDIFHeader(
         vcsbeam_context  *vm,
-        beam_geom        *beam_geom_vals )
+        beam_geom        *beam_geom_vals,
+        double           mjd_start,
+        double           sec_offset )
 {
     // Write log message
     sprintf( vm->log_message, "Preparing headers for output (receiver channel %lu)",
@@ -215,8 +229,11 @@ void vmPopulateVDIFHeader(
 
         strncpy( vm->vf[p].date_obs, time_utc, sizeof(time_utc) );
 
-        vm->vf[p].MJD_epoch = beam_geom_vals->intmjd + beam_geom_vals->fracmjd;
-        vm->vf[p].fctr      = vm->obs_metadata->metafits_coarse_chans[coarse_chan_idx].chan_centre_hz / 1e6; // (MHz)
+        vm->vf[p].MJD_start  = mjd_start;
+        vm->vf[p].sec_offset = sec_offset;
+        vm->vf[p].MJD_epoch  = beam_geom_vals->intmjd + beam_geom_vals->fracmjd;
+        vm->vf[p].fctr       = vm->obs_metadata->metafits_coarse_chans[coarse_chan_idx].chan_centre_hz / 1e6; // (MHz)
+        vm->vf[p].npol       = 2;
         strncpy( vm->vf[p].source, "unset", 24 );
 
         // The output file basename

diff --git a/src/form_beam.cu b/src/form_beam.cu
@@ -645,41 +645,6 @@ void vmPullS( vcsbeam_context *vm )
     cudaCheckErrors( "vmPullE: cudaMemcpyAsync failed" );
 }
 
-/**
- * Copies the beamformed voltages into a different array to prepare for
- * inverting the PFB.
- *
- * @todo Remove prepare_detected_beam() and use a "host buffer" instead.
- */
-void prepare_detected_beam( cuDoubleComplex ****detected_beam,
-                   mpi_psrfits *mpfs, vcsbeam_context *vm )
-{
-    // Get shortcut variables
-    uintptr_t nchan  = vm->nfine_chan;
-    uintptr_t npol   = vm->obs_metadata->num_ant_pols; // = 2
-    int file_no = vm->chunk_to_load / vm->chunks_per_second;
-
-    // Copy the data back from e back into the detected_beam array
-    // Make sure we put it back into the correct half of the array, depending
-    // on whether this is an even or odd second.
-    int offset, i;
-    offset = file_no % 2 * vm->fine_sample_rate;
-
-    // TODO: turn detected_beam into a 1D array
-    int p, ch, s, pol;
-    for (p   = 0; p   < vm->npointing  ; p++  )
-    for (s   = 0; s   < vm->fine_sample_rate; s++  )
-    for (ch  = 0; ch  < nchan      ; ch++ )
-    for (pol = 0; pol < npol       ; pol++)
-    {
-        i = p  * (npol*nchan*vm->fine_sample_rate) +
-            s  * (npol*nchan)                   +
-            ch * (npol)                         +
-            pol;
-
-        detected_beam[p][s+offset][ch][pol] = vm->e[i];
-    }
-}
 
 /**
  * Renormalises the detected Stokes parameters and copies them into PSRFITS
@@ -768,28 +733,56 @@ cuDoubleComplex ****create_detected_beam( int npointing, int nsamples, int nchan
 }
 
 
-/**
- * (Deprecated) Frees memory on the CPU.
- *
- * @todo Remove the function destroy_detected_beam().
- */
-void destroy_detected_beam( cuDoubleComplex ****array, int npointing, int nsamples, int nchan )
+/*
+* Create a 1-D host buffer containing the fine-channelised complex voltage samples which
+* will be given to the IPFB.
+*/
+cuDoubleComplex *create_data_buffer_fine( int npointing, int nsamples, int nchan, int npol )
 {
-    int p, s, ch;
-    for (p = 0; p < npointing; p++)
+    int buffer_size = npointing * nsamples * nchan * npol * sizeof(cuDoubleComplex);
+
+    // Allocate host memory for buffer
+    cuDoubleComplex *data_buffer_fine;
+    data_buffer_fine = (cuDoubleComplex *)malloc( buffer_size );
+
+    // Initialise buffer to zeros
+    memset( data_buffer_fine, 0, buffer_size );
+
+    return data_buffer_fine;
+}
+
+
+/*
+* Copy the fine-channelised beamformed voltages into a data buffer so that it can
+* be given to the IPFB.
+*/
+void prepare_data_buffer_fine( cuDoubleComplex *data_buffer_fine, vcsbeam_context *vm,
+                    uintptr_t timestep_idx )
+{
+    // Get shortcut variables
+    uintptr_t nchan = vm->nfine_chan;
+    uintptr_t npol  = vm->obs_metadata->num_ant_pols; // = 2
+    int file_no = timestep_idx % 2;
+
+    // Copy the beamformed data from e into the data buffer
+    // Make sure we put it back into the correct half of the array, depending
+    // on whether this is an even or odd second.
+    int offset = file_no % 2 * vm->fine_sample_rate;
+
+    int p,s,ch,pol,i,j;
+    for (p   = 0; p   < vm->npointing;        p++   )
+    for (s   = 0; s   < vm->fine_sample_rate; s++   )
+    for (ch  = 0; ch  < nchan;                ch++  )
+    for (pol = 0; pol < npol;                 pol++ )
     {
-        for (s = 0; s < nsamples; s++)
-        {
-            for (ch = 0; ch < nchan; ch++)
-                free( array[p][s][ch] );
+        // Calculate index for e
+        i = B_IDX(p,s,ch,pol,vm->fine_sample_rate,nchan,npol);
 
-            free( array[p][s] );
-        }
+        // Calculate index for data_buffer_fine
+        j = B_IDX(p,s+offset,ch,pol,vm->fine_sample_rate,nchan,npol);
 
-        free( array[p] );
+        data_buffer_fine[j] = vm->e[i];
     }
-
-    free( array );
 }
 
 /**