Source code for ytree.frontends.consistent_trees.arbor

"""
ConsistentTreesArbor class and member functions



"""

#-----------------------------------------------------------------------------
# Copyright (c) ytree development team. All rights reserved.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------

import glob
import numpy as np
import operator
import os

from yt.funcs import \
    get_pbar

from ytree.data_structures.arbor import \
    SegmentedArbor

from ytree.frontends.consistent_trees.fields import \
    ConsistentTreesFieldInfo
from ytree.frontends.consistent_trees.io import \
    ConsistentTreesDataFile, \
    ConsistentTreesTreeFieldIO, \
    ConsistentTreesHlistDataFile
from ytree.frontends.consistent_trees.utilities import \
    parse_ctrees_header
from ytree.frontends.rockstar.arbor import \
    RockstarArbor

from ytree.utilities.exceptions import \
    ArborDataFileEmpty
from ytree.utilities.io import \
    f_text_block

[docs]class ConsistentTreesArbor(SegmentedArbor): """ Arbors loaded from consistent-trees tree_*.dat files. """ _field_info_class = ConsistentTreesFieldInfo _tree_field_io_class = ConsistentTreesTreeFieldIO _default_dtype = np.float32 _node_io_attrs = ('_fi', '_si', '_ei') def _get_data_files(self): self.data_files = [ConsistentTreesDataFile(self.filename)] def _parse_parameter_file(self, filename=None, ntrees_in_file=True): if filename is None: filename = self.filename fi = parse_ctrees_header( self, filename, ntrees_in_file=ntrees_in_file) self.field_list = list(fi.keys()) self.field_info.update(fi) def _plant_trees(self): if self.is_planted or self._size == 0: return lkey = len("tree ")+1 block_size = 4096 data_file = self.data_files[0] data_file.open() data_file.fh.seek(0, 2) file_size = data_file.fh.tell() pbar = get_pbar("Loading tree roots", file_size) data_file.fh.seek(self._hoffset) offset = self._hoffset itree = 0 nblocks = np.ceil(float(file_size-self._hoffset) / block_size).astype(np.int64) for ib in range(nblocks): my_block = min(block_size, file_size - offset) if my_block <= 0: break buff = data_file.fh.read(my_block) lihash = -1 for ih in range(buff.count("#")): ihash = buff.find("#", lihash+1) inl = buff.find("\n", ihash+1) if inl < 0: buff += data_file.fh.readline() inl = len(buff) uid = int(buff[ihash+lkey:inl]) self._node_info['uid'][itree] = uid lihash = ihash self._node_info['_si'][itree] = offset + inl + 1 self._node_info['_fi'][itree] = 0 if itree > 0: self._node_info['_ei'][itree-1] = offset + ihash - 1 itree += 1 offset = data_file.fh.tell() pbar.update(offset) self._node_info['_ei'][-1] = offset data_file.close() pbar.finish() @classmethod def _is_valid(self, *args, **kwargs): """ File should end in .dat and have a line in the header with the string, "Consistent Trees". """ fn = args[0] if not fn.endswith(".dat"): return False with open(fn, "r") as f: valid = False while True: line = f.readline() if line is None or not line.startswith("#"): break if "Consistent Trees" in line: valid = True break if not valid: return False return True
[docs]class ConsistentTreesGroupArbor(ConsistentTreesArbor): """ Arbors loaded from consistent-trees locations.dat files. """ def _get_data_files(self): pass def _parse_parameter_file(self): f = open(self.filename, 'r') f.readline() self._hoffset = f.tell() line = f.readline() if not line: raise ArborDataFileEmpty(self.filename) fn = os.path.join(self.directory, line.split()[3]) super()._parse_parameter_file(filename=fn, ntrees_in_file=False) def _plant_trees(self): if self.is_planted: return f = open(self.filename, 'r') f.seek(self._hoffset) ldata = list(map( lambda x: [int(x[0]), int(x[1]), int(x[2]), x[3], len(x[0])], [line.split() for line, _ in f_text_block(f, pbar_string='Reading locations')] )) f.close() self._size = len(ldata) # It's faster to create and sort arrays and then sort ldata # for some reason. dfns = np.unique([datum[3] for datum in ldata]) dfns.sort() fids = np.array([datum[1] for datum in ldata]) fids.sort() ufids = np.unique(fids) ufids.sort() # Some data files may be empty and so unlisted. # Make sure file ids and names line up. data_files = [None]*(ufids.max()+1) for i,fid in enumerate(ufids): data_files[fid] = dfns[i] self.data_files = \ [None if fn is None else ConsistentTreesDataFile(os.path.join(self.directory, fn)) for fn in data_files] ldata.sort(key=operator.itemgetter(1, 2)) pbar = get_pbar("Loading tree roots", self._size) # Set end offsets for each tree. # We don't get them from the location file. lkey = len("tree ")+3 # length of the separation line between trees same_file = np.diff(fids, append=fids[-1]+1) == 0 for i, tdata in enumerate(ldata): self._node_info['uid'][i] = tdata[0] self._node_info['_fi'][i] = tdata[1] self._node_info['_si'][i] = tdata[2] # Get end index from next tree. if same_file[i]: self._node_info['_ei'][i] = ldata[i+1][2] - lkey - tdata[4] pbar.update(i+1) pbar.finish() # Get end index for last trees in files. for i in np.where(~same_file)[0]: data_file = self.data_files[fids[i]] data_file.open() data_file.fh.seek(0, 2) self._node_info['_ei'][i] = data_file.fh.tell() data_file.close() @classmethod def _is_valid(self, *args, **kwargs): """ File should end in .dat and have a line in the header with the string, "Consistent Trees". """ fn = args[0] if not os.path.basename(fn) == 'locations.dat': return False with open(fn, "r") as f: valid = False while True: line = f.readline() if line is None or not line.startswith("#"): break if "TreeRootID FileID Offset Filename" in line: valid = True if not valid: return False return True
[docs]class ConsistentTreesHlistArbor(RockstarArbor): """ Class for Arbors created from consistent-trees hlist_*.list files. This is a hybrid type with multiple catalog files like the rockstar frontend, but with headers structured like consistent-trees. """ _has_uids = True _field_info_class = ConsistentTreesFieldInfo _data_file_class = ConsistentTreesHlistDataFile def _parse_parameter_file(self): ConsistentTreesArbor._parse_parameter_file( self, ntrees_in_file=False) def _get_data_files(self): """ Get all out_*.list files and sort them in reverse order. """ prefix = os.path.join(os.path.dirname(self.filename), "hlist_") suffix = ".list" my_files = glob.glob(f"{prefix}*{suffix}") # sort by catalog number my_files.sort( key=lambda x: self._get_file_index(x, prefix, suffix), reverse=True) self.data_files = \ [self._data_file_class(f, self) for f in my_files] def _get_file_index(self, f, prefix, suffix): return float(f[f.find(prefix)+len(prefix):f.rfind(suffix)]) @classmethod def _is_valid(self, *args, **kwargs): """ File should end in .list. """ fn = args[0] if not os.path.basename(fn).startswith("hlist") or \ not fn.endswith(".list"): return False return True