"""
AHFArbor class and member functions
"""
#-----------------------------------------------------------------------------
# Copyright (c) ytree development team. All rights reserved.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------
from collections import defaultdict
import glob
import os
import re
from ytree.data_structures.arbor import \
CatalogArbor
from ytree.frontends.ahf.fields import \
AHFFieldInfo, \
AHFCRMFieldInfo
from ytree.frontends.ahf.io import \
AHFDataFile, \
AHFCRMDataFile
from ytree.frontends.ahf.misc import \
parse_AHF_file
from unyt.unit_registry import \
UnitRegistry
from ytree.utilities.io import \
f_text_block
[docs]
class AHFArbor(CatalogArbor):
"""
Arbor for Amiga Halo Finder data without a CRMratio2 file.
In this instance, we will use the mtree files to assemble the tree.
"""
_ahf_prefix = None
_mtree_prefix = None
_data_suffix = ".AHF_halos"
_mtree_suffix = ".AHF_mtree"
_par_suffix = ".parameter"
_crm_prefix = "MergerTree_"
_crm_suffix = "-CRMratio2"
crm_filename = None
_frequent_crm_midfixes = (".txt", "")
_field_info_class = AHFFieldInfo
_data_file_class = AHFDataFile
[docs]
def __init__(self, filename,
log_filename=None,
parameter_filename=None,
crm_filename=None,
hubble_constant=1.0, box_size=None,
omega_matter=None, omega_lambda=None,
name_config=None):
self.unit_registry = UnitRegistry()
self.hubble_constant = hubble_constant
self.omega_matter = omega_matter
self.omega_lambda = omega_lambda
self._box_size_user = box_size
self.log_filename = log_filename
self.parameter_filename = parameter_filename
self.crm_filename = crm_filename
if name_config is None:
name_config = {}
self._set_naming_conventions(name_config)
self._file_pattern = re.compile(
rf"(^.+[^0-9a-zA-Z]+)(\d+).*{self._par_suffix}$")
super().__init__(filename)
def _set_naming_conventions(self, config):
"""
Set some filename conventions.
"""
for k, v in config.items():
if not re.match(r"\w+_(?:prefix|suffix)", k):
raise ValueError(
f"name_config entry must end in either prefix or suffix: \"{k}\"")
if not hasattr(self, f"_{k}"):
raise ValueError(
f"name_config entry not associated with a known attribute: \"{k}\"")
setattr(self, f"_{k}", v)
@classmethod
def _is_crm_file(cls, filename):
"""
Checking if this has the proper crm prefix and suffix.
"""
return os.path.basename(filename).startswith(cls._crm_prefix) and \
filename.endswith(cls._crm_suffix)
def _guess_crm_filename(self, filename):
"""
Take an educated guess at the crm filename.
Return None if our guess does not exist.
"""
if self.crm_filename is not None:
if os.path.exists(self.crm_filename):
return self.crm_filename
else:
return None
if AHFArbor._is_crm_file(filename):
if os.path.exists(filename):
return filename
else:
return None
# Searching for <keyword>.something.<suffix>
res = re.search(rf"([^\.]+)\.[^\.]+{self._par_suffix}$", filename)
if not res:
return None
filekey = res.groups()[0]
ddir = os.path.dirname(filekey)
for midfix in self._frequent_crm_midfixes:
bname = os.path.basename(filekey) + midfix
fname = os.path.join(ddir, f"{self._crm_prefix}{bname}{self._crm_suffix}")
if os.path.exists(fname):
return fname
return None
@classmethod
def _is_parameter_file(cls, filename):
"""
Check if file has the proper suffix.
"""
return filename.endswith(cls._par_suffix)
def _parse_parameter_file(self):
df = AHFDataFile(self.parameter_filename, self)
pars = {"simu.omega0": "omega_matter",
"simu.lambda0": "omega_lambda",
"simu.boxsize": "box_size"}
if self.log_filename is None:
fns = glob.glob(df.filekey + "*.log")
if fns:
log_filename = fns[0]
else:
log_filename = None
else:
log_filename = self.log_filename
if log_filename is not None and os.path.exists(log_filename):
vals = parse_AHF_file(log_filename, pars, sep=":")
for attr in ["omega_matter",
"omega_lambda"]:
setattr(self, attr, vals.get(attr))
if "box_size" in vals:
self.box_size = self.quan(vals["box_size"], "Mpc/h")
if self._box_size_user is not None:
self.box_size = self.quan(self._box_size_user, "Mpc/h")
# fields from from the .AHF_halos files
f = open(f"{df.data_filekey}{self._data_suffix}")
line = f.readline()
f.close()
fields = [key[:key.rfind("(")]
for key in line[1:].strip().split()]
fi = dict([(field, {"column": i, "file": "halos"})
for i, field in enumerate(fields)])
# the scale factor comes from the catalog file header
fields.append("redshift")
fi["redshift"] = {"file": "header", "units": ""}
# the descendent ids come from the .AHF_mtree files
fields.append("desc_id")
fi["desc_id"] = {"file": "mtree", "units": ""}
self.field_list = fields
self.field_info.update(fi)
_fprefix = None
@property
def _prefix(self):
if self._fprefix is None:
# Match a patten of any characters, followed by some sort of
# separator (e.g., "." or "_"), then a number, and eventually
# the suffix.
reg = self._file_pattern.search(self.parameter_filename)
self._fprefix = reg.groups()[0]
return self._fprefix
def _get_data_files(self):
"""
Get all *.parameter files and sort them in reverse order.
"""
my_files = glob.glob(f"{self._prefix}*{self._par_suffix}")
# sort by catalog number
my_files.sort(key=self._get_file_index)
self.data_files = \
[self._data_file_class(f, self) for f in my_files]
# Set the mtree file for file i to that of i-1, since
# AHF thinks in terms of progenitors and not descendents.
for i, data_file in enumerate(self.data_files[:-1]):
data_file.mtree_filename = \
self.data_files[i+1].mtree_filename
self.data_files[-1].mtree_filename = None
self.data_files.reverse()
def _get_file_index(self, f):
reg = self._file_pattern.search(f)
if not reg:
raise RuntimeError(
f"Could not locate index within file: {f}.")
return int(reg.groups()[1])
@classmethod
def _is_valid(self, *args, **kwargs):
"""
File must end in .parameter.
"""
fn = args[0]
if not AHFArbor._is_parameter_file(fn):
return False
if "crm_filename" in kwargs:
return False
if self._guess_crm_filename(self, fn) is not None:
return False
return True
class AHFCRMArbor(AHFArbor):
"""
Arbor for AHF data that includes a CRM file.
The CRM file contains all the halo links, so we don't have to
assemble them from the mtree files.
"""
_has_uids = True
_field_info_class = AHFCRMFieldInfo
_data_file_class = AHFCRMDataFile
def _set_paths(self, filename):
"""
Get both a CRM file and a parameter file.
Raise an exception if we can't get both.
"""
super()._set_paths(filename)
if self.crm_filename is None:
self.crm_filename = self._guess_crm_filename(filename)
else:
if not os.path.exists(self.crm_filename):
raise RuntimeError(
"Specified crm_filename does not exist: ",
self.crm_filename)
if self.crm_filename is None:
raise RuntimeError(
f"crm_filename is None for {type(self)}. This shouldn't be.")
if self.parameter_filename is None:
self.parameter_filename = self._guess_parameter_filename(filename)
else:
if not os.path.exists(self.parameter_filename):
raise RuntimeError(
"Specified parameter_filename does not exist: ",
self.parameter_filename)
if self.parameter_filename is None:
raise RuntimeError(
f"parameter_filename is None for {type(self)}. This shouldn't be.")
def _set_parameter_filename(self, filename):
"""
We have a whole thing here...
"""
pass
def _guess_parameter_filename(self, filename):
"""
Guess parameter filename from the crm filename.
"""
if self.parameter_filename is not None:
if os.path.exists(self.parameter_filename):
return self.parameter_filename
else:
return None
basename = os.path.basename(filename)
filekey = basename[len(self._crm_prefix):-len(self._crm_suffix)]
# try a couple guesses at naming conventions,
# but don't work too hard.
for midfix in self._frequent_crm_midfixes:
pfns = glob.glob(
os.path.join(self.directory, filekey[:-len(midfix)]) +
f"*{self._par_suffix}")
if pfns:
break
# just look for all files with the right suffix and hope for the best
if not pfns:
pfns = glob.glob(f"*{self._par_suffix}")
if not pfns:
raise RuntimeError(
f"Could not find any files ending in {self._par_suffix} "
"from which to get parameters. Put some of those in here.")
try:
pfns.sort(key=self._get_file_index)
return pfns[-1]
except Exception:
return None
def _plant_trees(self):
if self.is_planted:
return
self._compute_links()
super()._plant_trees()
def _compute_links(self):
"""
Read the CRMratio2 file and hand out a dictionary of
uid: desc_uid for each data file.
"""
links = defaultdict(dict)
f = open(self.crm_filename, mode="r")
for i in range(3):
f.readline()
for line, loc in f_text_block(f, pbar_string="Computing links"):
if line.startswith("END"):
break
online = line.split()
thing = online[0]
if len(online) == 2:
my_descid = int(thing)
continue
my_id = int(thing)
cid = int(thing[:-12])
links[cid][my_id] = my_descid
f.close()
for df in self.data_files:
df._links = links[df._catalog_index]
@classmethod
def _is_valid(self, *args, **kwargs):
"""
Filename must end in .parameter or match the CRM naming
convention.
"""
fn = args[0]
# if it is a crm file, then we are good to go
if AHFCRMArbor._is_crm_file(fn):
return True
# if it is not a parameter file, then it can't be of this type
if not AHFCRMArbor._is_parameter_file(fn):
return False
mtree_fn = self._guess_crm_filename(self, fn)
if mtree_fn is None or not os.path.exists(mtree_fn):
return False
return True