Source code for ytree.frontends.lhalotree.arbor

"""
LHaloTreeArbor class and member functions



"""

# -----------------------------------------------------------------------------
# Copyright (c) ytree development team. All rights reserved.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# -----------------------------------------------------------------------------

import numpy as np
import glob

from yt.funcs import get_pbar
from unyt.exceptions import UnitParseError

from ytree.data_structures.arbor import Arbor

from ytree.frontends.lhalotree.fields import LHaloTreeFieldInfo
from ytree.frontends.lhalotree.io import LHaloTreeTreeFieldIO, LHaloTreeRootFieldIO
from ytree.frontends.lhalotree.utils import LHaloTreeReader
from ytree.utilities.logger import ytreeLogger



[docs]
class LHaloTreeArbor(Arbor):
    """
    Arbors for LHaloTree data.
    """

    _field_info_class = LHaloTreeFieldInfo
    _tree_field_io_class = LHaloTreeTreeFieldIO
    _root_field_io_class = LHaloTreeRootFieldIO
    _default_dtype = np.float32
    _node_io_attrs = ("_lht", "_index_in_lht")


[docs]
    def __init__(self, *args, **kwargs):
        r"""Added reader class to allow fast access of header info."""
        reader_keys = [
            "parameters",
            "parameter_file",
            "scale_factors",
            "scale_factor_file",
            "header_size",
            "nhalos_per_tree",
            "read_header_func",
            "item_dtype",
        ]
        reader_kwargs = dict()
        for k in reader_keys:
            if k in kwargs:
                reader_kwargs[k] = kwargs.pop(k)
        self._lht0 = LHaloTreeReader(args[0], **reader_kwargs)
        super().__init__(*args, **kwargs)
        kwargs.update(**reader_kwargs)
        lht0 = self._lht0
        files = sorted(glob.glob(lht0.filepattern))
        self._lhtfiles = [None for _ in files]
        self._lhtfiles[lht0.fileindex] = lht0
        if len(files) > 1:
            if (
                ("header_size" in reader_kwargs)  # pragma: no cover
                or ("nhalos_per_tree" in reader_kwargs)
            ):
                raise RuntimeError(
                    "Cannot use 'header_size' or 'nhalos_per_tree' "
                    + "for trees split across multiple files. Use "
                    + "'read_header_func' instead."
                )
            reader_kwargs.update(
                parameters=lht0.parameters,
                scale_factors=lht0.scale_factors,
                item_dtype=lht0.item_dtype,
                silent=True,
            )
            for f in files:
                if f == lht0.filename:
                    continue
                ilht = LHaloTreeReader(f, **reader_kwargs)
                self._lhtfiles[ilht.fileindex] = ilht
        # Assert files are there
        for f in self._lhtfiles:
            if f is None:  # pragma: no cover
                raise RuntimeError("Not all files were read.")


    # NOTE: LHaloTree is currently using np.memmap so it dosn't need
    # fd to be passed. If this causes memory issues for larger trees,
    # the below functions allow a file descriptor to be checked and updated
    # as needed to prevent unnecessary open/close operations when accessing
    # nodes in the same file. Alternatively, see io.py for an option that
    # caches the file descriptor any time a field is read.

    # def _func_update_file(self, node, *args, **kwargs):
    #     """Call a file making sure that the correct file is open."""
    #     func = kwargs.pop("_func", None)
    #     if func is None:
    #         raise RuntimeError("No function passed.")
    #     fd = self._node_io_fd
    #     if fd is None or (node._lht.filename != fd.name):
    #         if fd is not None:
    #             fd.close()
    #         self._node_io_fd = open(node._lht.filename, 'rb')
    #     kwargs["f"] = self._node_io_fd
    #     return func(node, *args, **kwargs)

    # def _node_io_loop(self, func, *args, **kwargs):
    #     """
    #     Since LHaloTrees can be split across multiple files, this
    #     optimization only works if the nodes are all in the same file.
    #     It's a small optimization that keeps the file open
    #     when doing io for multiple trees in the same file.
    #     """
    #     self._node_io_fd = None
    #     kwargs["_func"] = func
    #     super()._node_io_loop(
    #         self._func_update_file, *args, **kwargs)
    #     if self._node_io_fd is not None:
    #         self._node_io_fd.close()

    def _parse_parameter_file(self):
        """
        Parse the file header, get things like:
        - cosmological parameters
        - box size
        - list of fields
        """

        for u in ["mass", "vel", "len"]:
            setattr(self, "_lht_units_" + u, getattr(self._lht0, "units_" + u))
            # v, s = getattr(self._lht0, 'units_' + u).split()
            # setattr(self, '_lht_units_' + u, self.quan(float(v), s))

        self.hubble_constant = self._lht0.hubble_constant
        self.omega_matter = self._lht0.omega_matter
        self.omega_lambda = self._lht0.omega_lambda
        self.box_size = self.quan(self._lht0.box_size, self._lht_units_len)
        # self.box_size = self._lht0.box_size * self._lht_units_len

        # a list of all fields on disk
        fields = self._lht0.fields
        # a dictionary of information for each field
        # this can have specialized information for reading the field
        fi = {}
        # for example:
        # fi["mass"] = {"column": 4, "units": "Msun/h", ...}
        none_keys = [
            "Descendant",
            "FirstProgenitor",
            "NextProgenitor",
            "FirstHaloInFOFgroup",
            "NextHaloInFOFgroup",
            "Len",
            "MostBoundID",
            "SnapNum",
            "FileNr",
            "SubhaloIndex",
            "uid",
            "desc_uid",
            "scale_factor",
            "Jx",
            "Jy",
            "Jz",
        ]
        mass_keys = ["M_Mean200", "Mvir", "M_TopHat", "SubHalfMass"]
        dist_keys = ["x", "y", "z"]
        velo_keys = ["VelDisp", "Vmax", "vx", "vy", "vz"]
        all_keys = [none_keys, mass_keys, dist_keys, velo_keys]
        all_units = ["", self._lht_units_mass, self._lht_units_len, self._lht_units_vel]
        for keylist, unit in zip(all_keys, all_units):
            try:
                self.quan(1, unit)
                punit = unit
            except UnitParseError:  # pragma: no cover
                ytreeLogger.warning(f"Could not parse unit: {unit}")
                punit = ""
            for k in keylist:
                fi[k] = {"units": punit}

        self.field_list = fields
        self.field_info.update(fi)

    def _plant_trees(self):
        """
        This is where we figure out how many trees there are,
        create the array to hold them, and instantiate a root
        tree_node for each tree.
        """

        if self.is_planted:
            return

        # open file, count trees
        ntrees_tot = 0
        for lht in self._lhtfiles:
            ntrees_tot += lht.ntrees
        self._size = ntrees_tot

        pbar = get_pbar("Loading tree roots", ntrees_tot)
        self._node_info["_lht"] = np.empty(ntrees_tot, dtype=object)

        itot = 0
        for ifile, lht in enumerate(self._lhtfiles):
            ntrees = lht.ntrees
            root_uids = lht.all_uids[lht.nhalos_before_tree]
            for i in range(ntrees):
                self._node_info["uid"][itot] = root_uids[i]
                self._node_info["_lht"][itot] = lht
                self._node_info["_index_in_lht"][itot] = i
                itot += 1
                pbar.update(itot)

        pbar.finish()

    @classmethod
    def _is_valid(self, *args, **kwargs):
        """
        Return True if we are able to initialize a reader.
        """
        try:
            kwargs.update(silent=True, validate=True)
            LHaloTreeReader(*args, **kwargs)
        except (IOError, TypeError):
            return False
        return True