Source code for ytree.frontends.ahf.arbor

"""
AHFArbor class and member functions



"""

# -----------------------------------------------------------------------------
# Copyright (c) ytree development team. All rights reserved.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# -----------------------------------------------------------------------------

from collections import defaultdict
import glob
import os
import re

from ytree.data_structures.arbor import CatalogArbor
from ytree.frontends.ahf.fields import AHFFieldInfo, AHFCRMFieldInfo
from ytree.frontends.ahf.io import AHFDataFile, AHFCRMDataFile
from ytree.frontends.ahf.misc import parse_AHF_file
from unyt.unit_registry import UnitRegistry
from ytree.utilities.io import f_text_block


[docs] class AHFArbor(CatalogArbor): """ Arbor for Amiga Halo Finder data without a CRMratio2 file. In this instance, we will use the mtree files to assemble the tree. """ _ahf_prefix = None _mtree_prefix = None _data_suffix = ".AHF_halos" _mtree_suffix = ".AHF_mtree" _par_suffix = ".parameter" _crm_prefix = "MergerTree_" _crm_suffix = "-CRMratio2" crm_filename = None _frequent_crm_midfixes = (".txt", "") _field_info_class = AHFFieldInfo _data_file_class = AHFDataFile
[docs] def __init__( self, filename, log_filename=None, parameter_filename=None, crm_filename=None, hubble_constant=1.0, box_size=None, omega_matter=None, omega_lambda=None, name_config=None, ): self.unit_registry = UnitRegistry() self.hubble_constant = hubble_constant self.omega_matter = omega_matter self.omega_lambda = omega_lambda self._box_size_user = box_size self.log_filename = log_filename self.parameter_filename = parameter_filename self.crm_filename = crm_filename if name_config is None: name_config = {} self._set_naming_conventions(name_config) self._file_pattern = re.compile( rf"(^.+[^0-9a-zA-Z]+)(\d+).*{self._par_suffix}$" ) super().__init__(filename)
def _set_naming_conventions(self, config): """ Set some filename conventions. """ for k, v in config.items(): if not re.match(r"\w+_(?:prefix|suffix)", k): raise ValueError( f'name_config entry must end in either prefix or suffix: "{k}"' ) if not hasattr(self, f"_{k}"): raise ValueError( f'name_config entry not associated with a known attribute: "{k}"' ) setattr(self, f"_{k}", v) @classmethod def _is_crm_file(cls, filename): """ Checking if this has the proper crm prefix and suffix. """ return os.path.basename(filename).startswith( cls._crm_prefix ) and filename.endswith(cls._crm_suffix) def _guess_crm_filename(self, filename): """ Take an educated guess at the crm filename. Return None if our guess does not exist. """ if self.crm_filename is not None: if os.path.exists(self.crm_filename): return self.crm_filename else: return None if AHFArbor._is_crm_file(filename): if os.path.exists(filename): return filename else: return None # Searching for <keyword>.something.<suffix> res = re.search(rf"([^\.]+)\.[^\.]+{self._par_suffix}$", filename) if not res: return None filekey = res.groups()[0] ddir = os.path.dirname(filekey) for midfix in self._frequent_crm_midfixes: bname = os.path.basename(filekey) + midfix fname = os.path.join(ddir, f"{self._crm_prefix}{bname}{self._crm_suffix}") if os.path.exists(fname): return fname return None @classmethod def _is_parameter_file(cls, filename): """ Check if file has the proper suffix. """ return filename.endswith(cls._par_suffix) def _parse_parameter_file(self): df = AHFDataFile(self.parameter_filename, self) pars = { "simu.omega0": "omega_matter", "simu.lambda0": "omega_lambda", "simu.boxsize": "box_size", } if self.log_filename is None: fns = glob.glob(df.filekey + "*.log") if fns: log_filename = fns[0] else: log_filename = None else: log_filename = self.log_filename if log_filename is not None and os.path.exists(log_filename): vals = parse_AHF_file(log_filename, pars, sep=":") for attr in ["omega_matter", "omega_lambda"]: setattr(self, attr, vals.get(attr)) if "box_size" in vals: self.box_size = self.quan(vals["box_size"], "Mpc/h") if self._box_size_user is not None: self.box_size = self.quan(self._box_size_user, "Mpc/h") # fields from from the .AHF_halos files f = open(f"{df.data_filekey}{self._data_suffix}") line = f.readline() f.close() fields = [key[: key.rfind("(")] for key in line[1:].strip().split()] fi = dict( [(field, {"column": i, "file": "halos"}) for i, field in enumerate(fields)] ) # the scale factor comes from the catalog file header fields.append("redshift") fi["redshift"] = {"file": "header", "units": ""} # the descendent ids come from the .AHF_mtree files fields.append("desc_id") fi["desc_id"] = {"file": "mtree", "units": ""} self.field_list = fields self.field_info.update(fi) _fprefix = None @property def _prefix(self): if self._fprefix is None: # Match a patten of any characters, followed by some sort of # separator (e.g., "." or "_"), then a number, and eventually # the suffix. reg = self._file_pattern.search(self.parameter_filename) self._fprefix = reg.groups()[0] return self._fprefix def _get_data_files(self): """ Get all *.parameter files and sort them in reverse order. """ my_files = glob.glob(f"{self._prefix}*{self._par_suffix}") # sort by catalog number my_files.sort(key=self._get_file_index) self.data_files = [self._data_file_class(f, self) for f in my_files] # Set the mtree file for file i to that of i-1, since # AHF thinks in terms of progenitors and not descendents. for i, data_file in enumerate(self.data_files[:-1]): data_file.mtree_filename = self.data_files[i + 1].mtree_filename self.data_files[-1].mtree_filename = None self.data_files.reverse() def _get_file_index(self, f): reg = self._file_pattern.search(f) if not reg: raise RuntimeError(f"Could not locate index within file: {f}.") return int(reg.groups()[1]) @classmethod def _is_valid(self, *args, **kwargs): """ File must end in .parameter. """ fn = args[0] if not AHFArbor._is_parameter_file(fn): return False if "crm_filename" in kwargs: return False if self._guess_crm_filename(self, fn) is not None: return False return True
class AHFCRMArbor(AHFArbor): """ Arbor for AHF data that includes a CRM file. The CRM file contains all the halo links, so we don't have to assemble them from the mtree files. """ _has_uids = True _field_info_class = AHFCRMFieldInfo _data_file_class = AHFCRMDataFile def _set_paths(self, filename): """ Get both a CRM file and a parameter file. Raise an exception if we can't get both. """ super()._set_paths(filename) if self.crm_filename is None: self.crm_filename = self._guess_crm_filename(filename) else: if not os.path.exists(self.crm_filename): raise RuntimeError( "Specified crm_filename does not exist: ", self.crm_filename ) if self.crm_filename is None: raise RuntimeError( f"crm_filename is None for {type(self)}. This shouldn't be." ) if self.parameter_filename is None: self.parameter_filename = self._guess_parameter_filename(filename) else: if not os.path.exists(self.parameter_filename): raise RuntimeError( "Specified parameter_filename does not exist: ", self.parameter_filename, ) if self.parameter_filename is None: raise RuntimeError( f"parameter_filename is None for {type(self)}. This shouldn't be." ) def _set_parameter_filename(self, filename): """ We have a whole thing here... """ pass def _guess_parameter_filename(self, filename): """ Guess parameter filename from the crm filename. """ if self.parameter_filename is not None: if os.path.exists(self.parameter_filename): return self.parameter_filename else: return None basename = os.path.basename(filename) filekey = basename[len(self._crm_prefix) : -len(self._crm_suffix)] # try a couple guesses at naming conventions, # but don't work too hard. for midfix in self._frequent_crm_midfixes: pfns = glob.glob( os.path.join(self.directory, filekey[: -len(midfix)]) + f"*{self._par_suffix}" ) if pfns: break # just look for all files with the right suffix and hope for the best if not pfns: pfns = glob.glob(f"*{self._par_suffix}") if not pfns: raise RuntimeError( f"Could not find any files ending in {self._par_suffix} " "from which to get parameters. Put some of those in here." ) try: pfns.sort(key=self._get_file_index) return pfns[-1] except Exception: return None def _plant_trees(self): if self.is_planted: return self._compute_links() super()._plant_trees() def _compute_links(self): """ Read the CRMratio2 file and hand out a dictionary of uid: desc_uid for each data file. """ links = defaultdict(dict) f = open(self.crm_filename, mode="r") for i in range(3): f.readline() for line, loc in f_text_block(f, pbar_string="Computing links"): if line.startswith("END"): break online = line.split() thing = online[0] if len(online) == 2: my_descid = int(thing) continue my_id = int(thing) cid = int(thing[:-12]) links[cid][my_id] = my_descid f.close() for df in self.data_files: df._links = links[df._catalog_index] @classmethod def _is_valid(self, *args, **kwargs): """ Filename must end in .parameter or match the CRM naming convention. """ fn = args[0] # if it is a crm file, then we are good to go if AHFCRMArbor._is_crm_file(fn): return True # if it is not a parameter file, then it can't be of this type if not AHFCRMArbor._is_parameter_file(fn): return False mtree_fn = self._guess_crm_filename(self, fn) if mtree_fn is None or not os.path.exists(mtree_fn): return False return True