Source code for deepdisc.data_format.conversions

import os

import astropy.io.fits as fits
import h5py
import numpy as np
import scarlet
from detectron2.utils.file_io import PathManager
from iopath.common.file_io import file_lock
import os, json, shutil
import logging
[docs] logger = logging.getLogger(__name__)
[docs] def fitsim_to_numpy(img_files, outdir,start_index=0): """Converts a list of single-band FITS images to multi-band numpy arrays Parameters ---------- img_files: list[str] A nested list of the FITS image files. The first index is the image and the second index is the filter outdir: str The directory to output the numpy arrays start_index: int Index in the first img filename string used to chunk a unique identifier end_index: int Index in the first img filename string used to chunk a unique identifier """ for images in img_files: full_im = [] for img in images: with fits.open(img, memmap=False, lazy_load_hdus=False) as hdul: data = hdul[0].data full_im.append(data) full_im = np.array(full_im) bn = os.path.basename(img).split('.fits')[0] fn = os.path.join(os.path.dirname(img), bn[start_index:])+'.npy' np.save(fn, full_im) return
[docs] def fitsim_to_hdf5(img_files, outname, dset="train"): """Converts a list of single-band FITS images to flattened multi-band images in an hdf5 file Parameters ---------- img_files: list[str] A nested list of the FITS image files. The first index is the image and the second index is the filter outname: str The name of the output file """ all_images = [] for images in img_files: full_im = [] for img in images: with fits.open(img, memmap=False, lazy_load_hdus=False) as hdul: data = hdul[0].data full_im.append(data) full_im = np.array(full_im) all_images.append(full_im.flatten()) all_images = np.array(all_images) with h5py.File(outname, "w") as f: data = f.create_dataset("images", data=all_images) return
[docs] def ddict_to_hdf5(dataset_dicts, outname): """Converts a list of dataset dictionaries to an hdf5 file (for RAIL usage) Parameters ---------- dataset_dicts: list[dict] The dataset dicts outname: str The name of the output file """ with h5py.File(outname, 'w') as file: data = [json.dumps(this_dict) for this_dict in dataset_dicts] dt = h5py.special_dtype(vlen=str) dataset = file.create_dataset('metadata_dicts', data=data, dtype=dt) return
[docs] def numpyim_to_hdf5(img_files, outname): """Converts a list of single-band FITS images to flattened multi-band images in an hdf5 file Parameters ---------- img_files: list[str] A nested list of the FITS image files. The first index is the image and the second index is the filter outname: str The name of the output file """ all_images = [] for img_file in img_files: full_im = np.load(img_file) all_images.append(full_im.flatten()) all_images = np.array(all_images) with h5py.File(outname, "w") as f: data = f.create_dataset("images", data=all_images) return
[docs] def convert_to_json(dict_list, output_file, allow_cached=True): """ Converts dataset into COCO format and saves it to a json file. dataset_name must be registered in DatasetCatalog and in detectron2's standard format. Args: dataset_name: reference from the config file to the catalogs must be registered in DatasetCatalog and in detectron2's standard format output_file: path of json file that will be saved to allow_cached: if json file is already present then skip conversion """ PathManager.mkdirs(os.path.dirname(output_file)) with file_lock(output_file): if PathManager.exists(output_file) and allow_cached: logger.warning( f"Using previously cached COCO format annotations at '{output_file}'. " "You need to clear the cache file if your dataset has been modified." ) else: print(f"Caching COCO format annotations at '{output_file}' ...") tmp_file = output_file + ".tmp" with PathManager.open(tmp_file, "w") as f: json.dump(dict_list, f) shutil.move(tmp_file, output_file)