Source code for ytree.frontends.consistent_trees_hdf5.io

"""
ConsistentTreesHDF5Arbor io classes and member functions



"""

#-----------------------------------------------------------------------------
# Copyright (c) ytree development team. All rights reserved.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------

from collections import defaultdict
import h5py
import numpy as np

from yt.funcs import \
    get_pbar

from ytree.data_structures.io import \
    DataFile, \
    DefaultRootFieldIO, \
    TreeFieldIO

class ChunkStore:
    def __init__(self, chunk_size=262144):
        self.chunk_size = chunk_size
        self.reset()

    def reset(self):
        self.data = {}
        self.ind = {}

    def get(self, fh, field, index):
        start, end = index
        si, ei = self.ind.get(field, (0, 0))

        if field not in self.data or ei < end or si > start:
            si = start
            ei = start + self.chunk_size
            self.ind[field] = (si, ei)
            self.data[field] = fh[field][si:ei]

        data_s = start - si
        data_e = end - si
        return self.data[field][data_s:data_e]

[docs]class ConsistentTreesHDF5DataFile(DataFile):
[docs] def __init__(self, filename, linkname): super().__init__(filename) self.linkname = linkname self.real_fh = None self._field_cache = ChunkStore()
def open(self): self.real_fh = h5py.File(self.filename, mode="r") if self.linkname is None: self.fh = self.real_fh else: self.fh = self.real_fh[self.linkname] def close(self): self.real_fh.close() self.fh = None
[docs]class ConsistentTreesHDF5TreeFieldIO(TreeFieldIO): def _read_fields(self, root_node, fields, dtypes=None, root_only=False): """ Read fields from disk for a single tree. """ data_file = self.arbor.data_files[root_node._fi] close = False if data_file.fh is None: close = True data_file.open() fh = data_file.fh['Forests'] if self.arbor._aos: fh = fh['halos'] if root_only: index = (root_node._si, root_node._si+1) else: index = (root_node._si, root_node._ei) field_cache = data_file._field_cache field_data = dict((field, field_cache.get(fh, field, index)) for field in fields) if close: data_file.close() for field in fields: field_data[field] = field_data[field].copy() self._apply_units(fields, field_data) return field_data
[docs]class ConsistentTreesHDF5RootFieldIO(DefaultRootFieldIO): """ Read in fields for first node in all trees/forest. This function is optimized for the struct of arrays layout. It will work for array of structs layout, but field access will be 1 to 2 orders of magnitude slower. """ def _read_fields(self, storage_object, fields, dtypes=None): if dtypes is None: dtypes = {} arbor = self.arbor arbor._plant_trees() # Use the _node_io_loop machinery to get the data files # and corresponding tree indices. data_files, index_list, _ = arbor._node_io_loop_prepare(None) c = 0 rdata = defaultdict(list) iend = arbor._file_count.cumsum() istart = iend - arbor._file_count pbar = get_pbar('Reading root fields', arbor.size) for idf, (data_file, nodes) in enumerate(zip(data_files, index_list)): my_indices = arbor._node_info['_si'][istart[idf]:iend[idf]] arbor._node_io_loop_start(data_file) fh = data_file.fh['Forests'] if self.arbor._aos: fh = fh['halos'] for field in fields: darray = fh[field][()] rdata[field].append(darray[my_indices]) arbor._node_io_loop_finish(data_file) c += my_indices.size pbar.update(c) pbar.finish() field_data = {} for field in fields: data = np.concatenate(rdata[field]) dtype = dtypes.get(field) if dtype is not None: data = data.astype(dtype) field_data[field] = data self._apply_units(fields, field_data) return field_data