Source code for cngi.conversion.convert_table

#  CASA Next Generation Infrastructure
#  Copyright (C) 2021 AUI, Inc. Washington DC, USA
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""
this module will be included in the api
"""



[docs]def convert_table(infile, outfile=None, subtable=None, keys=None, timecols=None, ignorecols=None, compressor=None, chunks=(10000, -1), append=False, nofile=False): """ Convert casacore table format to xarray Dataset and zarr storage format. This function requires CASA6 casatools module. Table rows may be renamed or expanded to n-dim arrays based on column values specified in keys. Parameters ---------- infile : str Input table filename outfile : str Output zarr filename. If None, will use infile name with .tbl.zarr extension subtable : str Name of the subtable to process. If None, main table will be used keys : dict or str Source column mappings to dimensions. Can be a dict mapping source columns to target dims, use a tuple when combining cols (ie {('ANTENNA1','ANTENNA2'):'baseline'} or a string to rename the row axis dimension to the specified value. Default of None timecols : list list of strings specifying column names to convert to datetime format from casacore time. Default is None ignorecols : list list of column names to ignore. This is useful if a particular column is causing errors. Default is None compressor : numcodecs.blosc.Blosc The blosc compressor to use when saving the converted data to disk using zarr. If None the zstd compression algorithm used with compression level 2. chunks : int Shape of desired chunking in the form of (dim0, dim1, ..., dimN), use -1 for entire axis in one chunk. Default is (80000, 10). Chunking is applied per column / data variable. If too few dimensions are specified, last chunk size is reused as necessary. Note: chunk size is the product of the four numbers, and data is batch processed by the first axis, so that will drive memory needed for conversion. append : bool Append an xarray dataset as a new partition to an existing zarr directory. False will overwrite zarr directory with a single new partition nofile : bool Allows legacy table to be directly read without file conversion. If set to true, no output file will be written and entire table will be held in memory. Requires ~4x the memory of the table size. Default is False Returns ------- New xarray.core.dataset.Dataset New xarray Dataset of table data contents. One element in list per DDI plus the metadata global. """ import os from numcodecs import Blosc import importlib_metadata import cngi._utils._table_conversion as tblconv # parse filename to use infile = os.path.expanduser(infile) prefix = infile[:infile.rindex('.')] if outfile is None: outfile = prefix + '.tbl.zarr' outfile = os.path.expanduser(outfile) if not infile.endswith('/'): infile = infile + '/' if not outfile.endswith('/'): outfile = outfile + '/' if subtable is None: subtable = '' if compressor is None: compressor = Blosc(cname='zstd', clevel=2, shuffle=0) print('processing %s to %s' % (infile+subtable, outfile+subtable)) # need to manually remove existing zarr file (if any) if (not nofile) and (not append): os.system("rm -fr " + outfile) os.system("mkdir " + outfile) if (keys is None) or (type(keys) is str): xds = tblconv.convert_simple_table(infile, outfile, subtable=subtable, #rowdim='d0' if keys is None else keys, timecols=[] if timecols is None else timecols, ignore= [] if ignorecols is None else ignorecols, compressor=compressor, chunks=chunks, nofile=nofile) else: xds = tblconv.convert_expanded_table(infile, outfile, keys=keys, subtable=subtable, subsel=None, timecols=[] if timecols is None else timecols, dimnames={}, ignore=[] if ignorecols is None else ignorecols, compressor=compressor, chunks=chunks, nofile=nofile) # write sw version that did this conversion to zarr directory with open(outfile + '/.version', 'w') as fid: fid.write('cngi-protoype ' + importlib_metadata.version('cngi-prototype') + '\n') return xds