Source code for ytree.data_structures.arbor

"""
Arbor class and member functions



"""

#-----------------------------------------------------------------------------
# Copyright (c) ytree development team. All rights reserved.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------

from collections import \
    defaultdict
import functools
import json
import numpy as np
import os

from yt.extern.six import \
    add_metaclass, \
    string_types
from yt.frontends.ytdata.utilities import \
    save_as_dataset
from yt.funcs import \
    get_pbar, \
    TqdmProgressBar
from yt.units.dimensions import \
    length
from yt.units.unit_registry import \
    UnitRegistry
from yt.units.yt_array import \
    YTArray, \
    YTQuantity
from yt.utilities.cosmology import \
    Cosmology

from ytree.data_structures.fields import \
    FakeFieldContainer, \
    FieldContainer, \
    FieldInfoContainer
from ytree.data_structures.misc import \
    _determine_output_filename
from ytree.data_structures.io import \
    DefaultRootFieldIO, \
    TreeFieldIO
from ytree.data_structures.tree_node import \
    TreeNode
from ytree.data_structures.tree_node_selector import \
    tree_node_selector_registry
from ytree.utilities.exceptions import \
    ArborFieldAlreadyExists, \
    ArborFieldDependencyNotFound
from ytree.utilities.logger import \
    fake_pbar, \
    ytreeLogger as mylog

arbor_registry = {}

class RegisteredArbor(type):
    """
    Add to the registry of known Arbor classes to cycle
    through in the load function.
    """
    def __init__(cls, name, b, d):
        type.__init__(cls, name, b, d)
        arbor_type = name[:name.rfind("Arbor")]
        if arbor_type:
            arbor_registry[arbor_type] = cls

[docs]@add_metaclass(RegisteredArbor) class Arbor(object): """ Base class for all Arbor classes. Loads a merger-tree output file or a series of halo catalogs and create trees, stored in an array in :func:`~ytree.data_structures.arbor.Arbor.trees`. Arbors can be saved in a universal format with :func:`~ytree.data_structures.arbor.Arbor.save_arbor`. Also, provide some convenience functions for creating YTArrays and YTQuantities and a cosmology calculator. """ _field_info_class = FieldInfoContainer _root_field_io_class = DefaultRootFieldIO _tree_field_io_class = TreeFieldIO
[docs] def __init__(self, filename): """ Initialize an Arbor given an input file. """ self.filename = filename self.basename = os.path.basename(filename) self._parse_parameter_file() self._set_units() self._field_data = FieldContainer(self) self._node_io = self._tree_field_io_class(self) self._root_io = self._root_field_io_class(self) self._get_data_files() self._setup_fields() self._set_default_selector()
def _get_data_files(self): """ Get all files that hold field data and make them known to the i/o system. """ pass def _parse_parameter_file(self): """ Read relevant parameters from parameter file or file header and detect fields. """ raise NotImplementedError def _plant_trees(self): """ Create the list of root tree nodes. """ raise NotImplementedError def is_setup(self, tree_node): """ Return True if arrays of uids and descendent uids have been read in. Setup has also completed if tree is already grown. """ return self.is_grown(tree_node) or \ tree_node._uids is not None def _setup_tree(self, tree_node, **kwargs): """ Create arrays of uids and desc_uids and attach them to the root node. """ # skip if this is not a root or if already setup if self.is_setup(tree_node): return idtype = np.int64 fields, _ = \ self.field_info.resolve_field_dependencies(["uid", "desc_uid"]) halo_id_f, desc_id_f = fields dtypes = {halo_id_f: idtype, desc_id_f: idtype} field_data = self._node_io._read_fields(tree_node, fields, dtypes=dtypes, **kwargs) tree_node._uids = field_data[halo_id_f] tree_node._desc_uids = field_data[desc_id_f] tree_node._tree_size = tree_node._uids.size def is_grown(self, tree_node): """ Return True if a tree has been fully assembled, i.e., the hierarchy of ancestor tree nodes has been built. """ return tree_node.root != -1 def _grow_tree(self, tree_node, **kwargs): """ Create an array of TreeNodes hanging off the root node and assemble the tree structure. """ # skip this if not a root or if already grown if self.is_grown(tree_node): return self._setup_tree(tree_node, **kwargs) nhalos = tree_node.uids.size nodes = np.empty(nhalos, dtype=np.object) nodes[0] = tree_node for i in range(1, nhalos): nodes[i] = TreeNode(tree_node.uids[i], arbor=self) tree_node._nodes = nodes # Add tree information to nodes uidmap = {} for i, node in enumerate(nodes): node.treeid = i node.root = tree_node uidmap[tree_node.uids[i]] = i # Link ancestor/descendents # Separate loop for trees like lhalotree where descendent # can follow in order for i, node in enumerate(nodes): descid = tree_node.desc_uids[i] if descid != -1: desc = nodes[uidmap[descid]] desc.add_ancestor(node) node.descendent = desc def _node_io_loop(self, func, *args, **kwargs): """ Call the provided function over a list of nodes. If possible, group nodes by common data files to speed things up. This should work like __iter__, except we call a function instead of yielding. Parameters ---------- func : function Function to be called on an array of nodes. pbar : optional, string or yt.funcs.TqdmProgressBar A progress bar to be updated with each iteration. If a string, a progress bar will be created and the finish function will be called. If a progress bar is provided, the finish function will not be called. Default: None (no progress bar). root_nodes : optional, array of root TreeNodes Array of nodes over which the function will be called. If None, the list will be self.trees (i.e., all root_nodes). Default: None. store : optional, string If not None, any return value captured from the function will be stored in an attribute with this name associated with the TreeNode. Default: None. """ pbar = kwargs.pop("pbar", None) root_nodes = kwargs.pop("root_nodes", None) if root_nodes is None: root_nodes = self.trees store = kwargs.pop("store", None) data_files, node_list = self._node_io_loop_prepare(root_nodes) nnodes = sum([nodes.size for nodes in node_list]) finish = True if pbar is None: pbar = fake_pbar("", nnodes) elif not isinstance(pbar, TqdmProgressBar): pbar = get_pbar(pbar, nnodes) else: finish = False for data_file, nodes in zip(data_files, node_list): self._node_io_loop_start(data_file) for node in nodes: rval = func(node, *args, **kwargs) if store is not None: setattr(node, store, rval) pbar.update(1) self._node_io_loop_finish(data_file) if finish: pbar.finish() def _node_io_loop_start(self, data_file): pass def _node_io_loop_finish(self, data_file): pass def _node_io_loop_prepare(self, root_nodes): """ This is called at the beginning of _node_io_loop. In different frontends, this can be used to group nodes by common data files. Return ------ list of data files and a list of node arrays Each data file corresponds to an array of nodes. """ return [None], [root_nodes] def __iter__(self): """ Iterate over all items in the tree list. If possible, group nodes by common data files to speed things up. """ data_files, node_list = self._node_io_loop_prepare(self.trees) for data_file, nodes in zip(data_files, node_list): self._node_io_loop_start(data_file) for node in nodes: yield node self._node_io_loop_finish(data_file) _trees = None @property def trees(self): """ Array containing all trees in the arbor. """ if self._trees is None: self._plant_trees() return self._trees def __repr__(self): return self.basename def __getitem__(self, key): return self.query(key) def query(self, key): """ If given a string, return an array of field values for the roots of all trees. If given an integer, return a tree from the list of trees. """ if isinstance(key, string_types): if key in ("tree", "prog"): raise SyntaxError("Argument must be a field or integer.") self._root_io.get_fields(self, fields=[key]) if self.field_info[key].get("type") == "analysis": return self._field_data.pop(key) return self._field_data[key] return self.trees[key] def __len__(self): """ Return length of tree list. """ return self.trees.size _field_info = None @property def field_info(self): """ A dictionary containing information for each available field. """ if self._field_info is None and \ self._field_info_class is not None: self._field_info = self._field_info_class(self) return self._field_info @property def size(self): """ Return length of tree list. """ return self.trees.size _unit_registry = None @property def unit_registry(self): """ Unit system registry. """ if self._unit_registry is None: self._unit_registry = UnitRegistry() return self._unit_registry @unit_registry.setter def unit_registry(self, value): self._unit_registry = value self._arr = None self._quan = None _hubble_constant = None @property def hubble_constant(self): """ Value of the Hubble parameter. """ return self._hubble_constant @hubble_constant.setter def hubble_constant(self, value): self._hubble_constant = value # reset the unit registry lut while preserving other changes self.unit_registry = UnitRegistry.from_json( self.unit_registry.to_json()) self.unit_registry.modify("h", self.hubble_constant) _box_size = None @property def box_size(self): """ The simulation box size. """ return self._box_size @box_size.setter def box_size(self, value): self._box_size = value # set unitary as soon as we know the box size self.unit_registry.add( "unitary", float(self.box_size.in_base()), length) def _setup_fields(self): self.derived_field_list = [] self.analysis_field_list = [] self.field_info.setup_known_fields() self.field_info.setup_aliases() self.field_info.setup_derived_fields() self.field_info.setup_vector_fields() def _set_units(self): """ Set "cm" units for explicitly comoving. Note, we are using comoving units all the time since we are dealing with data at multiple redshifts. """ for my_unit in ["m", "pc", "AU", "au"]: new_unit = "%scm" % my_unit self._unit_registry.add( new_unit, self._unit_registry.lut[my_unit][0], length, self._unit_registry.lut[my_unit][3]) self.cosmology = Cosmology( hubble_constant=self.hubble_constant, omega_matter=self.omega_matter, omega_lambda=self.omega_lambda, unit_registry=self.unit_registry)
[docs] def set_selector(self, selector, *args, **kwargs): r""" Sets the tree node selector to be used. This sets the manner in which halo progenitors are chosen from a list of ancestors. The most obvious example is to select the most massive ancestor. Parameters ---------- selector : string Name of the selector to be used. Any additional arguments and keywords to be provided to the selector function should follow. Examples -------- >>> import ytree >>> a = ytree.load("rockstar_halos/trees/tree_0_0_0.dat") >>> a.set_selector("max_field_value", "mass") """ self.selector = tree_node_selector_registry.find( selector, *args, **kwargs)
_arr = None @property def arr(self): """ Create a YTArray using the Arbor's unit registry. """ if self._arr is not None: return self._arr self._arr = functools.partial(YTArray, registry=self.unit_registry) return self._arr _quan = None @property def quan(self): """ Create a YTQuantity using the Arbor's unit registry. """ if self._quan is not None: return self._quan self._quan = functools.partial(YTQuantity, registry=self.unit_registry) return self._quan def _set_default_selector(self): """ Set the default tree node selector as maximum mass. """ self.set_selector("max_field_value", "mass")
[docs] def select_halos(self, criteria, trees=None, select_from="tree", fields=None): """ Select halos from the arbor based on a set of criteria given as a string. Parameters ---------- criteria: string A string that will eval to a Numpy-like selection operation performed on a TreeNode object called "tree". Example: 'tree["tree", "redshift"] > 1' trees : optional, list or array of TreeNodes A list or array of TreeNode objects in which to search. If none given, the search is performed over the full arbor. select_from : optional, "tree" or "prog" Determines whether to perform the search over the full tree or just the main progenitors. Note, the value given must be consistent with what appears in the criteria string. For example, a criteria string of 'tree["tree", "redshift"] > 1' cannot be used when setting select_from to "prog". Default: "tree". fields : optional, list of strings Use to provide a list of fields required by the criteria evaluation. If given, fields will be preloaded in an optimized way and the search will go faster. Default: None. Returns ------- halos : array of TreeNodes A flat array of all TreeNodes meeting the criteria. Examples -------- >>> import ytree >>> a = ytree.load("tree_0_0_0.dat") >>> halos = a.select_halos('tree["tree", "redshift"] > 1', ... fields=["redshift"]) >>> >>> halos = a.select_halos('tree["prog", "mass"].to("Msun") >= 1e10', ... select_from="prog", fields=["mass"]) """ if select_from not in ["tree", "prog"]: raise SyntaxError( "Keyword \"select_from\" must be either \"tree\" or \"prog\".") if trees is None: trees = self.trees if fields is None: fields = [] self._node_io_loop(self._setup_tree, root_nodes=trees, pbar="Setting up trees") if fields: self._node_io_loop( self._node_io.get_fields, pbar="Getting fields", root_nodes=trees, fields=fields, root_only=False) halos = [] pbar = get_pbar("Selecting halos", self.trees.size) for tree in trees: my_filter = np.asarray(eval(criteria)) if my_filter.size != tree[select_from].size: raise RuntimeError( ("Filter array and tree array sizes do not match. " + "Make sure select_from (\"%s\") matches criteria (\"%s\").") % (select_from, criteria)) halos.extend(tree[select_from][my_filter]) pbar.update(1) pbar.finish() return np.array(halos)
[docs] def add_analysis_field(self, name, units): r""" Add an empty field to be filled by analysis operations. Parameters ---------- name : string Field name. units : string Field units. Examples -------- >>> import ytree >>> a = ytree.load("tree_0_0_0.dat") >>> a.add_analysis_field("robots", "Msun * kpc") >>> # Set field for some halo. >>> a[0]["tree"][7]["robots"] = 1979.816 """ if name in self.field_info: raise ArborFieldAlreadyExists(name, arbor=self) self.analysis_field_list.append(name) self.field_info[name] = {"type": "analysis", "units": units}
[docs] def add_alias_field(self, alias, field, units=None, force_add=True): r""" Add a field as an alias to another field. Parameters ---------- alias : string Alias name. field : string The field to be aliased. units : optional, string Units in which the field will be returned. force_add : optional, bool If True, add field even if it already exists and warn the user and raise an exception if dependencies do not exist. If False, silently do nothing in both instances. Default: True. Examples -------- >>> import ytree >>> a = ytree.load("tree_0_0_0.dat") >>> # "Mvir" exists on disk >>> a.add_alias_field("mass", "Mvir", units="Msun") >>> print (a["mass"]) """ if alias in self.field_info: if force_add: ftype = self.field_info[alias].get("type", "on-disk") if ftype in ["alias", "derived"]: fl = self.derived_field_list else: fl = self.field_list mylog.warn( ("Overriding field \"%s\" that already " + "exists as %s field.") % (alias, ftype)) fl.pop(fl.index(alias)) else: return if field not in self.field_info: if force_add: raise ArborFieldDependencyNotFound( field, alias, arbor=self) else: return if units is None: units = self.field_info[field].get("units") self.derived_field_list.append(alias) self.field_info[alias] = \ {"type": "alias", "units": units, "dependencies": [field]} if "aliases" not in self.field_info[field]: self.field_info[field]["aliases"] = [] self.field_info[field]["aliases"].append(alias)
[docs] def add_derived_field(self, name, function, units=None, description=None, vector_field=False, force_add=True): r""" Add a field that is a function of other fields. Parameters ---------- name : string Field name. function : callable The function to be called to generate the field. This function should take two arguments, the arbor and the data structure containing the dependent fields. See below for an example. units : optional, string The units in which the field will be returned. description : optional, string A short description of the field. vector_field: optional, bool If True, field is an xyz vector. Default: False. force_add : optional, bool If True, add field even if it already exists and warn the user and raise an exception if dependencies do not exist. If False, silently do nothing in both instances. Default: True. Examples -------- >>> import ytree >>> a = ytree.load("tree_0_0_0.dat") >>> def _redshift(field, data): ... return 1. / data["scale"] - 1 ... >>> a.add_derived_field("redshift", _redshift) >>> print (a["redshift"]) """ if name in self.field_info: if force_add: ftype = self.field_info[name].get("type", "on-disk") if ftype in ["alias", "derived"]: fl = self.derived_field_list else: fl = self.field_list mylog.warn( ("Overriding field \"%s\" that already " + "exists as %s field.") % (name, ftype)) fl.pop(fl.index(name)) else: return if units is None: units = "" info = {"name": name, "type": "derived", "function": function, "units": units, "vector_field": vector_field, "description": description} fc = FakeFieldContainer(self, name=name) try: rv = function(info, fc) except TypeError as e: raise RuntimeError( """ Field function syntax in ytree has changed. Field functions must now take two arguments, as in the following: def my_field(field, data): return data['mass'] Check the TypeError exception above for more details. """) raise e except ArborFieldDependencyNotFound as e: if force_add: raise e else: return rv.convert_to_units(units) info["dependencies"] = list(fc.keys()) self.derived_field_list.append(name) self.field_info[name] = info
@classmethod def _is_valid(cls, *args, **kwargs): """ Check if input file works with a specific Arbor class. This is used with :func:`~ytree.data_structures.arbor.load` function. """ return False
[docs] def save_arbor(self, filename="arbor", fields=None, trees=None, max_file_size=524288): r""" Save the arbor to a file. The saved arbor can be re-loaded as an arbor. Parameters ---------- filename : optional, string Output file keyword. If filename ends in ".h5", the main header file will be just that. If not, filename will be <filename>/<basename>.h5. Default: "arbor". fields : optional, list of strings The fields to be saved. If not given, all fields will be saved. Returns ------- header_filename : string The filename of the saved arbor. Examples -------- >>> import ytree >>> a = ytree.load("rockstar_halos/trees/tree_0_0_0.dat") >>> fn = a.save_arbor() >>> # reload it >>> a2 = ytree.load(fn) """ if trees is None: all_trees = True trees = self.trees roots = trees else: all_trees = False # assemble unique tree roots for getting fields trees = np.asarray(trees) roots = [] root_uids = [] for tree in trees: if tree.root == -1: my_root = tree else: my_root = tree.root if my_root.uid not in root_uids: roots.append(my_root) root_uids.append(my_root.uid) roots = np.array(roots) del root_uids if fields in [None, "all"]: # If a field has an alias, get that instead. fields = [] for field in self.field_list + self.analysis_field_list: fields.extend( self.field_info[field].get("aliases", [field])) else: fields.extend([f for f in ["uid", "desc_uid"] if f not in fields]) ds = {} for attr in ["hubble_constant", "omega_matter", "omega_lambda"]: if hasattr(self, attr): ds[attr] = getattr(self, attr) extra_attrs = {"box_size": self.box_size, "arbor_type": "YTreeArbor", "unit_registry_json": self.unit_registry.to_json()} self._node_io_loop(self._setup_tree, root_nodes=roots, pbar="Setting up trees") if all_trees: self._root_io.get_fields(self, fields=fields) # determine file layout nn = 0 # node count nt = 0 # tree count nnodes = [] ntrees = [] tree_size = np.array([tree.tree_size for tree in trees]) for ts in tree_size: nn += ts nt += 1 if nn > max_file_size: nnodes.append(nn-ts) ntrees.append(nt-1) nn = ts nt = 1 if nn > 0: nnodes.append(nn) ntrees.append(nt) nfiles = len(nnodes) nnodes = np.array(nnodes) ntrees = np.array(ntrees) tree_end_index = ntrees.cumsum() tree_start_index = tree_end_index - ntrees # write header file fieldnames = [field.replace("/", "_") for field in fields] myfi = {} rdata = {} rtypes = {} for field, fieldname in zip(fields, fieldnames): fi = self.field_info[field] myfi[fieldname] = \ dict((key, fi[key]) for key in ["units", "description"] if key in fi) if all_trees: rdata[fieldname] = self._field_data[field] else: rdata[fieldname] = self.arr([t[field] for t in trees]) rtypes[fieldname] = "data" # all saved trees will be roots if not all_trees: rdata["desc_uid"][:] = -1 extra_attrs["field_info"] = json.dumps(myfi) extra_attrs["total_files"] = nfiles extra_attrs["total_trees"] = trees.size extra_attrs["total_nodes"] = tree_size.sum() hdata = {"tree_start_index": tree_start_index, "tree_end_index" : tree_end_index, "tree_size" : ntrees} hdata.update(rdata) htypes = dict((f, "index") for f in hdata) htypes.update(rtypes) filename = _determine_output_filename(filename, ".h5") header_filename = "%s.h5" % filename save_as_dataset(ds, header_filename, hdata, field_types=htypes, extra_attrs=extra_attrs) # write data files ftypes = dict((f, "data") for f in fieldnames) for i in range(nfiles): my_nodes = trees[tree_start_index[i]:tree_end_index[i]] self._node_io_loop( self._node_io.get_fields, pbar="Getting fields [%d/%d]" % (i+1, nfiles), root_nodes=my_nodes, fields=fields, root_only=False) fdata = dict((field, np.empty(nnodes[i])) for field in fieldnames) my_tree_size = tree_size[tree_start_index[i]:tree_end_index[i]] my_tree_end = my_tree_size.cumsum() my_tree_start = my_tree_end - my_tree_size pbar = get_pbar("Creating field arrays [%d/%d]" % (i+1, nfiles), len(fields)*nnodes[i]) c = 0 for field, fieldname in zip(fields, fieldnames): for di, node in enumerate(my_nodes): if node.is_root: ndata = node._field_data[field] else: ndata = node["tree", field] if field == "desc_uid": # make sure it's a root when loaded ndata[0] = -1 fdata[fieldname][ my_tree_start[di]:my_tree_end[di]] = ndata c += my_tree_size[di] pbar.update(c) pbar.finish() fdata["tree_start_index"] = my_tree_start fdata["tree_end_index"] = my_tree_end fdata["tree_size"] = my_tree_size for ft in ["tree_start_index", "tree_end_index", "tree_size"]: ftypes[ft] = "index" my_filename = "%s_%04d.h5" % (filename, i) save_as_dataset({}, my_filename, fdata, field_types=ftypes) return header_filename
[docs]class CatalogArbor(Arbor): """ Base class for Arbors created from a series of halo catalog files where the descendent ID for each halo has been pre-determined. """ _prefix = None _data_file_class = None
[docs] def __init__(self, filename): super(CatalogArbor, self).__init__(filename) if "uid" not in self.field_list: for field in "uid", "desc_uid": self.field_list.append(field) self.field_info[field] = {"units": "", "source": "arbor"}
def _get_data_files(self): raise NotImplementedError def _plant_trees(self): # this can be called once with the list, but fields are # not guaranteed to be returned in order. fields = \ [self.field_info.resolve_field_dependencies([field])[0][0] for field in ["halo_id", "desc_id"]] halo_id_f, desc_id_f = fields dtypes = dict((field, np.int64) for field in fields) uid = 0 trees = [] nfiles = len(self.data_files) descs = lastids = None pbar = get_pbar("Planting trees", len(self.data_files)) for i, dfl in enumerate(self.data_files): if not isinstance(dfl, list): dfl = [dfl] batches = [] bsize = [] hids = [] ancs = defaultdict(list) for data_file in dfl: data = data_file._read_fields(fields, dtypes=dtypes) nhalos = len(data[halo_id_f]) batch = np.empty(nhalos, dtype=object) for it in range(nhalos): descid = data[desc_id_f][it] root = i == 0 or descid == -1 # The data says a descendent exists, but it's not there. # This shouldn't happen, but it does sometimes. if not root and descid not in lastids: root = True descid = data[desc_id_f][it] = -1 tree_node = TreeNode(uid, arbor=self, root=root) tree_node._fi = it tree_node.data_file = data_file batch[it] = tree_node if root: trees.append(tree_node) else: ancs[descid].append(tree_node) uid += 1 data_file.trees = batch batches.append(batch) bsize.append(batch.size) hids.append(data[halo_id_f]) if i > 0: for descid, ancestors in ancs.items(): # this will not be fast descendent = descs[descid == lastids][0] descendent._ancestors = ancestors for ancestor in ancestors: ancestor.descendent = descendent if i < nfiles - 1: descs = np.empty(sum(bsize), dtype=object) lastids = np.empty(descs.size, dtype=np.int64) ib = 0 for batch, hid, bs in zip(batches, hids, bsize): descs[ib:ib+bs] = batch lastids[ib:ib+bs] = hid ib += bs pbar.update(i) pbar.finish() self._trees = np.array(trees) def _setup_tree(self, tree_node): if self.is_setup(tree_node): return nodes = [] uids = [] desc_uids = [-1] for i, node in enumerate(tree_node.twalk()): node.treeid = i node.root = tree_node nodes.append(node) uids.append(node.uid) if i > 0: desc_uids.append(node.descendent.uid) tree_node._nodes = np.array(nodes) tree_node._uids = np.array(uids) tree_node._desc_uids = np.array(desc_uids) tree_node._tree_size = tree_node._uids.size # This should bypass any attempt to get this field in # the conventional way. if self.field_info["uid"]["source"] == "arbor": tree_node._field_data["uid"] = tree_node._uids tree_node._field_data["desc_uid"] = tree_node._desc_uids def _grow_tree(self, tree_node): pass
global load_warn load_warn = True
[docs]def load(filename, method=None, **kwargs): """ Load an Arbor, determine the type automatically. Parameters ---------- filename : string Input filename. method : optional, string The type of Arbor to be loaded. Existing types are: ConsistentTrees, Rockstar, TreeFarm, YTree. If not given, the type will be determined based on characteristics of the input file. kwargs : optional, dict Additional keyword arguments are passed to _is_valid and the determined type. Returns ------- Arbor Examples -------- >>> import ytree >>> # saved Arbor >>> a = ytree.load("arbor/arbor.h5") >>> # consistent-trees output >>> a = ytree.load("rockstar_halos/trees/tree_0_0_0.dat") >>> # Rockstar catalogs >>> a = ytree.load("rockstar_halos/out_0.list") >>> # TreeFarm catalogs >>> a = ytree.load("my_halos/fof_subhalo_tab_025.0.h5") >>> # LHaloTree catalogs >>> a = ytree.load("my_halos/trees_063.0") >>> # Amiga Halo Finder >>> a = ytree.load("ahf_halos/snap_N64L16_000.parameter", ... hubble_constant=0.7) """ if not os.path.exists(filename): raise IOError("file does not exist: %s." % filename) if method is None: candidates = [] for candidate, c in arbor_registry.items(): if c._is_valid(filename, **kwargs): candidates.append(candidate) if len(candidates) == 0: raise IOError("Could not determine arbor type for %s." % filename) elif len(candidates) > 1: errmsg = "Could not distinguish between these arbor types:\n" for candidate in candidates: errmsg += "Possible: %s.\n" % candidate errmsg += "Provide one of these types using the \'method\' keyword." raise IOError(errmsg) else: method = candidates[0] else: if method not in arbor_registry: raise IOError("Invalid method: %s. Available: %s." % (method, arbor_registry.keys())) global load_warn if method not in ["YTree", "LHaloTree"] and load_warn: print( ("Performance will be improved by saving this arbor with " + "\"save_arbor\" and reloading:\n" + "\t>>> a = ytree.load(\"%s\")\n" + "\t>>> fn = a.save_arbor()\n" + "\t>>> a = ytree.load(fn)") % filename) load_warn = False return arbor_registry[method](filename, **kwargs)