# CASA Next Generation Infrastructure
# Copyright (C) 2021 AUI, Inc. Washington DC, USA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
this module will be included in the api
"""
################################################
[docs]def time_average(mxds, vis, bin=1, width=None, span='state', maxuvwdistance=None):
"""
Average data across the time axis
Parameters
----------
mxds : xarray.core.dataset.Dataset
input multi-xarray Dataset with global data
vis : str
visibility partition in the mxds to use
bin : int
number of adjacent times to average, used when width is None. Default=1 (no change)
width : str
resample to width freq (i.e. '10s') and produce uniform time steps over span. Ignores bin. Default None uses bin value.
see https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html.
span : str
span of the binning. Allowed values are 'scan', 'state' or 'both'. Default is 'state' (meaning all states in a scan)
maxuvwdistance (future) : float
NOT IMPLEMENTED. maximum separation of start-to-end baselines that can be included in an average. (meters)
Returns
-------
xarray.core.dataset.Dataset
New output multi-xarray Dataset with global data
"""
import numpy as np
from cngi._utils._io import mxds_copier
xds = mxds.attrs[vis]
intnan = np.full((1), np.nan, dtype=np.int32)[0]
# drop vars that don't have time so they don't get stacked later on
notime_vars = [cc for cc in list(xds.data_vars) if 'time' not in xds[cc].dims]
xds = xds.drop_vars(notime_vars)
#######
# mapped out over groups
def timebin(gxds, stacked=True):
if stacked: gxds = gxds.unstack('stb')
# mean coarsen/resample everything but data and weight
dvs = [dv for dv in gxds.data_vars if dv not in ['DATA', 'CORRECTED_DATA', 'DATA_WEIGHT', 'CORRECTED_DATA_WEIGHT']] + list(gxds.coords)
if width is None:
nxds = gxds[dvs].coarsen(time=bin, boundary='pad').mean()
else:
nxds = gxds[dvs].resample(time=width).mean()
# sum coarsen/resample weight
for wt in ['DATA_WEIGHT', 'CORRECTED_DATA_WEIGHT']:
if wt in gxds.data_vars:
if width is None:
nxds[wt] = gxds[wt].coarsen(time=bin, boundary='pad').sum()
else:
nxds[wt] = gxds[wt].resample(time=width).sum()
# use weight in coarsening/resampling data cols
for col in ['DATA', 'CORRECTED_DATA']:
if (col in gxds.data_vars) and (col+'_WEIGHT' in gxds.data_vars):
if width is None:
xda = (gxds[col] * gxds[col+'_WEIGHT']).coarsen(time=bin, boundary='pad').sum()
else:
xda = (gxds[col] * gxds[col+'_WEIGHT']).resample(time=width).sum()
nxds[col] = xda / nxds[col+'_WEIGHT']
if stacked: nxds = nxds.stack({'stb': ('time', 'baseline')})
return nxds
#############
# span across state by grouping on scans (keeps scans separate)
if span == 'state':
txds = xds.stack({'stb': ('time', 'baseline')})
txds = txds.groupby('SCAN_NUMBER').map(timebin)
txds = txds.where(txds.SCAN_NUMBER.notnull() & (txds.SCAN_NUMBER > intnan), drop=True).unstack('stb')
txds = txds.transpose('time', 'baseline', 'chan', 'pol', 'uvw_index', 'spw_id', 'pol_id')
# span across scans by grouping on states (keeps states separate)
elif span == 'scan':
txds = xds.stack({'stb': ('time', 'baseline')})
txds = txds.groupby('STATE_ID').map(timebin)
txds = txds.where(txds.STATE_ID.notnull() & (txds.STATE_ID > intnan), drop=True).unstack('stb')
txds = txds.transpose('time', 'baseline', 'chan', 'pol', 'uvw_index', 'spw_id', 'pol_id')
# span across both
else:
txds = timebin(xds, stacked=False)
# coarsen can change int/bool dtypes to float, so they need to be manually set back
for dv in txds.data_vars:
txds[dv] = txds[dv].astype(xds[dv].dtype)
# put the attributes and dropped data vars back in
txds = txds.assign_attrs(xds.attrs).assign(dict([(dv, mxds.attrs[vis][dv]) for dv in notime_vars]))
# verify values
#cxds1 = xds_state.assign_coords({'time_s': xds_state.time.astype('datetime64[s]')}).swap_dims({'time':'time_s'})
#cxds2 = txds.assign_coords({'time_s': txds.time.astype('datetime64[s]')}).swap_dims({'time':'time_s'})
#cxds = cxds1.DATA - cxds2.DATA
#cxds[51].values
return mxds_copier(mxds, vis, txds)