Source code for cngi.dio.read_image

#  CASA Next Generation Infrastructure
#  Copyright (C) 2021 AUI, Inc. Washington DC, USA
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""
this module will be included in the api
"""


#############################################
[docs]def read_image( infile, chunks=None, consolidated=True, overwrite_encoded_chunks=True, **kwargs ): """ Read xarray zarr format image from disk Parameters ---------- infile : str input zarr image filename chunks : dict sets specified chunk size per dimension. Dict is in the form of 'dim':chunk_size, for example {'d0':100, 'd1':100, 'chan':32, 'pol':1}. Default None uses the original zarr chunking. consolidated : bool use zarr consolidated metadata capability. Only works for stores that have already been consolidated. Default True works with datasets produced by convert_image which automatically consolidates metadata. overwrite_encoded_chunks : bool drop the zarr chunks encoded for each variable when a dataset is loaded with specified chunk sizes. Default True, only applies when chunks is not None. s3_key : string, optional optional support for explicit authentication if infile is provided as S3 URL. If S3 url is passed as input but this argument is not specified then only publicly-available, read-only buckets are accessible (so output dataset will be read-only). s3_secret : string, optional optional support for explicit authentication if infile is provided as S3 URL. If S3 url is passed as input but this argument is not specified then only publicly-available, read-only buckets are accessible (so output dataset will be read-only). Returns ------- xarray.core.dataset.Dataset New xarray Dataset of image contents """ import os from xarray import open_zarr if chunks is None: chunks = "auto" overwrite_encoded_chunks = False if infile.lower().startswith("s3"): # for treating AWS object storage as a "file system" import s3fs if "s3_key" and "s3_secret" in kwargs: # plaintext authentication is a security hazard that must be patched ASAP # boto3 can be used instead, see https://s3fs.readthedocs.io/en/latest/#credentials # if we instead choose to extend the current solution, might want to santiize inputs s3 = s3fs.S3FileSystem( anon=False, requester_pays=False, key=kwargs["s3_key"], secret=kwargs["s3_secret"], ) else: # only publicly-available, read-only buckets will work. Should probably catch the exception here... s3 = s3fs.S3FileSystem(anon=True, requester_pays=False) # expect a path style URI to file link, e.g., # 's3://cngi-prototype-test-data/2017.1.00271.S/member.uid___A001_X1273_X2e3_split_cal_concat_target_regrid.vis.zarr/xds0/' # decompose this for manipulation s3_url = infile.split(sep="//", maxsplit=1)[1] bucket = s3_url.split("/")[0] name = s3_url.split("/")[1] ds_path = "/".join([bucket, name]) INPUT = s3fs.S3Map(root="/" + ds_path, s3=s3, check=False) xds = open_zarr( INPUT, chunks=chunks, consolidated=consolidated, overwrite_encoded_chunks=overwrite_encoded_chunks, ) else: # assume infile exists on local disk infile = os.path.expanduser(infile) xds = open_zarr( infile, chunks=chunks, consolidated=consolidated, overwrite_encoded_chunks=overwrite_encoded_chunks, ) return xds