Source code for ytree.frontends.ytree.arbor

"""
YTreeArbor class and member functions



"""

#-----------------------------------------------------------------------------
# Copyright (c) ytree development team. All rights reserved.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------

import h5py
import json
import numpy as np
import os

from unyt.unit_registry import \
    UnitRegistry

from yt.data_objects.data_containers import \
    YTDataContainer
from yt.utilities.logger import \
    ytLogger

from ytree.data_structures.arbor import \
    Arbor
from ytree.frontends.ytree.io import \
    YTreeDataFile, \
    YTreeRootFieldIO, \
    YTreeTreeFieldIO
from ytree.frontends.ytree.utilities import \
    get_about, \
    get_conditional
from ytree.utilities.io import \
    _hdf5_yt_attr, \
    parse_h5_attr
from ytree.utilities.logger import \
    log_level
from ytree.yt_frontend import \
    YTreeDataset

[docs]class YTreeArbor(Arbor): """ Class for Arbors created from the :func:`~ytree.data_structures.arbor.Arbor.save_arbor` or :func:`~ytree.data_structures.tree_node.TreeNode.save_tree` functions. """ _root_field_io_class = YTreeRootFieldIO _tree_field_io_class = YTreeTreeFieldIO _suffix = ".h5" _node_io_attrs = ('_ai',) def _node_io_loop_prepare(self, nodes): if nodes is None: nodes = np.arange(self.size) ai = self._node_info['_ai'] elif nodes.dtype == object: ai = np.array( [node._ai if node.is_root else node.root._ai for node in nodes]) else: # assume an array of indices ai = self._node_info['_ai'][nodes] # the order they will be processed io_order = np.argsort(ai) ai = ai[io_order] # array to return them to original order return_order = np.empty_like(io_order) return_order[io_order] = np.arange(io_order.size) dfi = np.digitize(ai, self._node_io._ei) udfi = np.unique(dfi) data_files = [self.data_files[i] for i in udfi] index_list = [io_order[dfi == i] for i in udfi] return data_files, index_list, return_order def _node_io_loop_start(self, data_file): data_file._field_cache = {} data_file.open() def _node_io_loop_finish(self, data_file): data_file._field_cache = {} data_file.close() def _parse_parameter_file(self): self._prefix = \ self.filename[:self.filename.rfind(self._suffix)] fh = h5py.File(self.filename, mode="r") for attr in ["hubble_constant", "omega_matter", "omega_lambda"]: setattr(self, attr, fh.attrs.get(attr, None)) if "unit_registry_json" in fh.attrs: self.unit_registry = \ UnitRegistry.from_json( parse_h5_attr(fh, "unit_registry_json")) if "box_size" in fh.attrs: self.box_size = _hdf5_yt_attr( fh, "box_size", unit_registry=self.unit_registry) self.field_info.update( json.loads(parse_h5_attr(fh, "field_info"))) self._size = fh.attrs["total_trees"] fh.close() # analysis fields in sidecar files analysis_filename = f"{self._prefix}-analysis{self._suffix}" if os.path.exists(analysis_filename): self.analysis_filename = analysis_filename fh = h5py.File(analysis_filename, mode="r") analysis_fi = json.loads(parse_h5_attr(fh, "field_info")) fh.close() for field in analysis_fi: analysis_fi[field]["type"] = "analysis_saved" self.field_info.update(analysis_fi) else: self.analysis_filename = None self.field_list = list(self.field_info.keys()) def _plant_trees(self): if self.is_planted: return fh = h5py.File(self.filename, "r") self._node_info['uid'][:] = fh["data"]["uid"][()].astype(np.int64) self._node_io._si = fh["index"]["tree_start_index"][()] self._node_io._ei = fh["index"]["tree_end_index"][()] fh.close() self._node_info['_ai'][:] = np.arange(self.size) self.data_files = \ [YTreeDataFile(f"{self._prefix}_{i:04d}{self._suffix}") for i in range(self._node_io._si.size)] if self.analysis_filename is not None: for i, df in enumerate(self.data_files): df.analysis_filename = \ f"{self._prefix}_{i:04d}-analysis{self._suffix}"
[docs] def get_yt_selection(self, above=None, below=None, equal=None, about=None, conditionals=None, data_source=None): """ Get a selection of halos meeting given criteria. This function can be used to create database-like queries to search for halos meeting various criteria. It will return a :class:`~yt.data_objects.selection_objects.cut_region.YTCutRegion` that can be queried to get field values for all halos meeting the selection criteria. The :class:`~yt.data_objects.selection_objects.cut_region.YTCutRegion` can then be passed to :func:`~ytree.frontends.ytree.arbor.YTreeArbor.get_nodes_from_selection` to get all the :class:`~ytree.data_structures.tree_node.TreeNode` objects that meet the criteria. If multiple criteria are provided, selected halos must meet all criteria. To specify a custom data container, use the ``ytds`` attribute associated with the arbor to access the merger tree data as a yt dataset. For example: >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> ds = a.ytds Parameters ---------- above : optional, list of tuples with (field, value, <units>) Halos meeting a given criterion must have field values at or above the provided limiting value. Each entry in the list must contain the field name, limiting value, and (optionally) units. below : optional, list of tuples with (field, value, <units>) Halos meeting a given criterion must have field values at or below the provided limiting value. Each entry in the list must contain the field name, limiting value, and (optionally) units. equal : optional, list of tuples with (field, value, <units>) Halos meeting a given criterion must have field values equal to the provided value. Each entry in the list must contain the field name, value, and (optionally) units. about : optional, list of tuples with (field, value, tolerance, <units>) Halos meeting a given criterion must have field values within the tolerance of the provided value. Each entry in the list must contain the field name, value, tolerance, and (optionally) units. conditionals : optional, list of strings A list of conditionals for constructing a custom :class:`~yt.data_objects.selection_objects.cut_region.YTCutRegion`. This can be used instead of above/below/equal/about to create more complex selection criteria. See the Cut Regions section in the yt documentation for more information. The conditionals keyword can only be used if none of the first for selection keywords are given. data_source : optional, :class:`~yt.data_objects.data_containers.YTDataContainer` The source yt data container to be used to make the cut region. If none given, the :class:`~yt.data_objects.static_output.Dataset.all_data` container (i.e., the full dataset) is used. Returns ------- cr : :class:`~yt.data_objects.selection_objects.cut_region.YTCutRegion` The cut region associated with the provided selection criteria. Examples -------- >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> # select halos above 1e12 Msun at redshift > 0.5 >>> sel = a.get_yt_selection( ... above=[("mass", 1e13, "Msun"), ... ("redshift", 0.5)]) >>> print (sel["halos", "mass"]) >>> print (sel["halos", "virial_radius"]) >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> # select halos below 1e13 Msun at redshift > 1 >>> sel = a.get_yt_selection( ... below=[("mass", 1e13, "Msun")], ... above=[("redshift", 1)]) >>> print (sel["halos", "mass"]) >>> print (sel["halos", "virial_radius"]) >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> # select phantom halos (a consistent-trees field) >>> sel = a.get_yt_selection(equal=[("phantom", 1)]) >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> # select halos with vmax of 200 +-10 km/s (i.e., 5%) >>> sel = a.get_yt_selection(about=[("vmax", 200, "km/s", 0.05)]) >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> # use a yt conditional >>> sel = a.get_yt_selection( ... conditionals=['obj["halos", "mass"] > 1e12']) >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> # select halos only within a sphere >>> ds = a.ytds >>> sphere = ds.sphere(ds.domain_center, (10, Mpc)) >>> sel = a.get_yt_selection( ... above=[("mass", 1e13)], ... data_source=sphere) >>> # get the TreeNodes for the selection >>> for node in a.get_nodes_from_selection(sel): ... print (node["mass"]) See Also -------- select_halos, get_nodes_from_selection """ if above is None: above = [] if below is None: below = [] if equal is None: equal = [] if about is None: about = [] if conditionals is None: conditionals = [] if not (bool(conditionals) ^ any([above, below, equal, about])): raise ValueError( "Must specify either conditionals or above/below/equal/about, not both." f"\nconditionals: {conditionals}" f"\nabove: {above}" f"\nbelow: {below}" f"\nequal: {equal}" f"\nabout: {about}") if data_source is None: data_source = self.ytds.all_data() if not isinstance(data_source, YTDataContainer): raise ValueError( f"data_source must be a YTDataContainer: {data_source}.") for criterion in above: condition = get_conditional("above", criterion) conditionals.append(condition) for criterion in below: condition = get_conditional("below", criterion) conditionals.append(condition) for criterion in equal: condition = get_conditional("equal", criterion) conditionals.append(condition) for criterion in about: conditions = get_about(criterion) conditionals.extend(conditions) cr = data_source.cut_region(conditionals) return cr
[docs] def get_nodes_from_selection(self, container): """ Generate TreeNodes from a yt data container. All halos contained within the data container will be returned as TreeNode objects. This returns a generator that can be iterated over or cast as a list. Parameters ---------- container : :class:`~yt.data_objects.data_containers.YTDataContainer` Data container, such as a sphere or region, from which nodes will be generated. Returns ------- nodes : generator The :class:`~ytree.data_structures.tree_node.TreeNode` objects contained within the container. Examples -------- >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> c = a.arr([0.5, 0.5, 0.5], "unitary") >>> sphere = a.ytds.sphere(c, (0.1, "unitary")) >>> for node in a.get_nodes_from_selection(sphere): ... print (node["mass"]) >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> # select halos above 1e12 Msun at redshift > 0.5 >>> sel = a.get_yt_selection( ... above=[("mass", 1e13, "Msun"), ... ("redshift", 0.5)]) >>> my_nodes = list(a.get_nodes_from_selection(sel)) """ self._plant_trees() container.get_data([('halos', 'file_number'), ('halos', 'file_root_index'), ('halos', 'tree_index')]) file_number = container['halos', 'file_number'].d.astype(int) file_root_index = container['halos', 'file_root_index'].d.astype(int) tree_index = container['halos', 'tree_index'].d.astype(int) arbor_index = self._node_io._si[file_number] + file_root_index for ai, ti in zip(arbor_index, tree_index): root_node = self._generate_root_node(ai) if ti == 0: yield root_node else: yield root_node.get_node("forest", ti)
_ytds = None @property def ytds(self): """ Load as a yt dataset. Merger tree data is loaded as a yt dataset, providing full access to yt functionality. Fields are accessed with the naming convention, ("halos", <field name>). Examples -------- >>> import ytree >>> a = ytree.load("arbor/arbor.h5") >>> >>> ds = a.ytds >>> sphere = ds.sphere(ds.domain_center, (5, "Mpc")) >>> print (sphere["halos", "mass"]) >>> >>> for node in a.get_nodes_from_selection(sphere): ... print (node["position"]) """ if self._ytds is not None: return self._ytds with log_level(40, mylog=ytLogger): self._ytds = YTreeDataset(self.filename) return self._ytds @classmethod def _is_valid(self, *args, **kwargs): """ File should end in .h5, be loadable as an hdf5 file, and have "arbor_type" attribute. """ fn = args[0] if not fn.endswith(self._suffix): return False try: with h5py.File(fn, "r") as f: if "arbor_type" not in f.attrs: return False atype = f.attrs["arbor_type"] if hasattr(atype, "astype"): atype = atype.astype(str) if atype != "YTreeArbor": return False except BaseException: return False return True