Source code for pynpoint.readwrite.hdf5reading

"""
Module for reading HDF5 files that were created with the
:class:`~pynpoint.readwrite.hdf5writing.Hdf5WritingModule`.
"""

import os
import time
import warnings

from typing import Optional

import h5py
import numpy as np

from typeguard import typechecked

from pynpoint.core.processing import ReadingModule
from pynpoint.util.module import progress


[docs]class Hdf5ReadingModule(ReadingModule): """ Reads an HDF5 file from the given *input_dir* or the default directory of the Pypeline. A tag dictionary has to be set in order to choose the datasets which will be imported into the database. Also the static and non-static attributes are read from the HDF5 file and stored in the database with the corresponding data set. This module should only be used for reading HDF5 files that are created with the Hdf5WritingModule. Reading different type of HDF5 files may lead to inconsistencies in the central database. """ __author__ = 'Markus Bonse, Tomas Stolker' @typechecked def __init__(self, name_in: str, input_filename: Optional[str] = None, input_dir: Optional[str] = None, tag_dictionary: Optional[dict] = None): """ Parameters ---------- name_in : str Unique name of the module instance. input_filename : str, None The file name of the HDF5 input file. All files inside the input location will be imported if no filename is provided. input_dir : str, None The directory of the input HDF5 file. If no location is given, the default input location of the Pypeline is used. tag_dictionary : dict, None Dictionary of all data sets that will be imported. The dictionary format is {*tag_name_in_input_file*:*tag_name_in_database*, }. All data sets in the input HDF5 file that match one of the *tag_name_in_input_file* will be imported. The tag name inside the internal Pypeline database will be changed to *tag_name_in_database*. Returns ------- NoneType None """ super().__init__(name_in, input_dir=input_dir) if tag_dictionary is None: tag_dictionary = {} for out_tag in tag_dictionary.values(): self.add_output_port(out_tag) self.m_filename = input_filename self._m_tag_dictionary = tag_dictionary
[docs] @typechecked def read_single_hdf5(self, file_in: str) -> None: """ Function which reads a single HDF5 file. Parameters ---------- file_in : str Path and name of the HDF5 file. Returns ------- NoneType None """ hdf5_file = h5py.File(file_in, mode='r') for tag_in in self._m_tag_dictionary: tag_in = str(tag_in) # unicode keys cause errors tag_out = self._m_tag_dictionary[tag_in] if tag_in not in hdf5_file: warnings.warn(f'The dataset with tag name \'{tag_in}\' is not found in the HDF5 ' f'file.') continue # add data port_out = self._m_output_ports[tag_out] port_out.set_all(np.asarray(hdf5_file[tag_in][...])) # add static attributes for attr_name, attr_value in hdf5_file[tag_in].attrs.items(): port_out.add_attribute(name=attr_name, value=attr_value) # add non-static attributes if 'header_' + tag_in in hdf5_file: for attr_name in hdf5_file['header_' + tag_in]: attr_val = hdf5_file['header_' + tag_in + '/' + attr_name][...] port_out.add_attribute(name=attr_name, value=attr_val, static=False)
[docs] @typechecked def run(self) -> None: """ Run method of the module. Looks for all HDF5 files in the input directory and reads the datasets that are provided in the tag dictionary. Returns ------- NoneType None """ # create list of files to be read files = [] tmp_dir = os.path.join(self.m_input_location, '') # check if a single input file is given if self.m_filename is not None: # create file path + filename assert(os.path.isfile((tmp_dir + str(self.m_filename)))), \ f'Error: Input file does not exist. Input requested: {self.m_filename}' files.append((tmp_dir + str(self.m_filename))) else: # look for all HDF5 files in the directory for tmp_file in os.listdir(self.m_input_location): if tmp_file.endswith('.hdf5') or tmp_file.endswith('.h5'): files.append(tmp_dir + str(tmp_file)) start_time = time.time() for i, tmp_file in enumerate(files): progress(i, len(files), 'Reading HDF5 file...', start_time) self.read_single_hdf5(tmp_file)