"""
AHFArbor io classes and member functions
"""
#-----------------------------------------------------------------------------
# Copyright (c) ytree development team. All rights reserved.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------
from collections import defaultdict
import numpy as np
import os
import re
import weakref
from ytree.frontends.ahf.misc import \
parse_AHF_file
from ytree.data_structures.io import \
CatalogDataFile
from ytree.utilities.io import \
f_text_block
from ytree.utilities.misc import fround
[docs]class AHFDataFile(CatalogDataFile):
_redshift_precision = 3
[docs] def __init__(self, filename, arbor):
self.arbor = weakref.proxy(arbor)
self.filename = filename
self._catalog_index = arbor._get_file_index(filename)
self.filekey = self.filename[:self.filename.rfind(self.arbor._par_suffix)]
self._parse_header()
rprec = self._redshift_precision
rstr = r"\.z\d\.\d\{%d\}" % rprec
res = re.search(rstr, self.filekey)
if res:
self.data_filekey = self.filekey[:res.end()]
else:
zfmt = f".0{rprec}f"
for inc in [0, -10**-rprec]:
my_z = format(fround(self.redshift, decimals=rprec) + inc, zfmt)
fkey = f"{self.filekey}.z{my_z}"
if os.path.exists(fkey + self.arbor._data_suffix):
self.data_filekey = fkey
break
if not hasattr(self, "data_filekey"):
raise FileNotFoundError(
f"Cannot find data file: {fkey + self.arbor._data_suffix}.")
self.halos_filename = self.data_filekey + self.arbor._data_suffix
self.mtree_filename = self.data_filekey + self.arbor._mtree_suffix
if not os.path.exists(self.mtree_filename):
self.mtree_filename = None
self.fh = None
self._parse_data_header()
self.offsets = None
def _parse_data_header(self):
"""
Get header sizes from the two data files ending
in .AHF_halos and .AHF_mtree.
"""
self.open()
fh = self.fh
fh.seek(0, 2)
self.file_size = fh.tell()
fh.seek(0)
for line, loc in f_text_block(fh):
if not line.startswith("#"):
loc -= len(line) + 1
break
self._hoffset = loc + len(line) + 1
self.close()
def _parse_header(self):
"""
Get header information from the .log and .parameter
files. Use that to get the name of the data file.
"""
vals = parse_AHF_file(
self.filekey + ".parameter", {"z": "redshift"})
for par, val in vals.items():
setattr(self, par, val)
def open(self):
if self.fh is None:
self.fh = open(self.halos_filename, "r")
_links = None
@property
def links(self):
if self._links is None:
self._compute_links()
return self._links
def _compute_links(self):
"""
Compute the tree from the graph.
AHF computes a graph, where a given halo can
have both multiple progenitors and descendents.
Use the weight function to determine a unique
descendent for each halo.
descendent = max (M_ij = N_ij^2 / (N_i * N_j)),
where:
N_ij: number of shared particles
N_i : number of particles in halo i
N_j : number of particles in halo j
"""
data = self._read_mtree()
if data is None:
self._links = -1
return
m = data["shared"]**2 / (data["prog_part"] * data["desc_part"])
progids = np.unique(data["prog_id"])
descids = np.empty(progids.size, dtype=progids.dtype)
for i, progid in enumerate(progids):
prf = data["prog_id"] == progid
descids[i] = data["desc_id"][prf][m[prf].argmax()]
udata = {"prog_id": progids, "desc_id": descids}
self._links = udata
def _read_mtree(self):
"""
Read map of progenitors to descendents.
This is the ".AHF_mtree" file.
"""
if self.mtree_filename is None:
return None
data = defaultdict(list)
descid = descpart = None
f = open(self.mtree_filename, "r")
for line, offset in f_text_block(f):
if line.startswith("#"):
continue
if line[0].isdigit():
oline = line.split()
descid = int(oline[0])
descpart = int(oline[1])
else:
oline = line.split()
data["shared"].append(int(oline[0]))
data["prog_id"].append(int(oline[1]))
data["prog_part"].append(int(oline[2]))
data["desc_id"].append(descid)
data["desc_part"].append(descpart)
f.close()
if not data:
return None
for field in data:
data[field] = np.array(data[field])
return data
def _read_data_default(self, rfields, dtypes):
if not rfields:
return {}
fi = self.arbor.field_info
field_data = self._create_field_arrays(rfields, dtypes)
offsets = []
self.open()
f = self.fh
f.seek(self._hoffset)
file_size = self.file_size - self._hoffset
for line, offset in f_text_block(f, file_size=file_size):
offsets.append(offset)
sline = line.split()
for field in rfields:
dtype = dtypes[field]
field_data[field].append(dtype(sline[fi[field]["column"]]))
self.close()
if self.offsets is None:
self.offsets = np.array(offsets)
return field_data
def _read_data_select(self, rfields, tree_nodes, dtypes):
if not rfields:
return {}
fi = self.arbor.field_info
nt = len(tree_nodes)
field_data = self._create_field_arrays(
rfields, dtypes, size=nt)
self.open()
f = self.fh
for i in range(nt):
f.seek(self.offsets[tree_nodes[i]._fi])
line = f.readline()
sline = line.split()
for field in rfields:
dtype = dtypes[field]
field_data[field][i] = dtype(sline[fi[field]["column"]])
self.close()
return field_data
def _get_mtree_fields(self, tfields, dtypes, field_data):
"""
Use data from the mtree file to get descendent ids.
"""
if not tfields:
return
links = self.links
descids = np.empty(
len(field_data["ID"]),
dtype=dtypes['desc_id'])
if self.links == -1:
descids[:] = -1
else:
for i, hid in enumerate(field_data["ID"]):
inlink = hid == links["prog_id"]
if not inlink.any():
descids[i] = -1
else:
descids[i] = \
links["desc_id"][np.where(inlink)[0][0]]
field_data["desc_id"] = descids
def _read_fields(self, fields, tree_nodes=None, dtypes=None):
if dtypes is None:
dtypes = {}
fi = self.arbor.field_info
afields = [field for field in fields
if fi[field].get("source") == "arbor"]
my_fields = set(fields).difference(afields)
# Separate fields into one that come from the file header,
# the mtree file, and the halos file.
data_fields = defaultdict(list)
for field in my_fields:
source = fi[field]["file"]
data_fields[source].append(field)
hfields = data_fields.pop("header", [])
rfields = data_fields.pop("halos", [])
tfields = data_fields.pop("mtree", [])
# If we needs desc_ids, make sure to get IDs so
# we can link them.
if tfields:
if "ID" not in rfields:
rfields.append("ID")
dtypes.update(self.arbor._node_io._determine_dtypes(["ID"]))
field_data = {}
if tree_nodes is None:
field_data.update(
self._read_data_default(rfields, dtypes))
else:
# fields from the actual data
field_data.update(
self._read_data_select(rfields, tree_nodes, dtypes))
# fields from arbor-related info
field_data.update(
self._get_arbor_fields(afields, tree_nodes, dtypes))
# fields from the file header
field_data.update(
self._get_header_fields(
hfields, tree_nodes, dtypes))
# use data from the mtree file to get descendent ids
self._get_mtree_fields(tfields, dtypes, field_data)
return field_data
class AHFNewDataFile(AHFDataFile):
def _compute_links(self):
"""
Read the CRMratio2 file.
"""
def _compute_links(self):
self.arbor._compute_links()
def _get_mtree_fields(self, tfields, dtypes, field_data):
if not tfields:
return
my_ids = field_data["ID"]
field_data["desc_id"] = descids = np.full(
len(my_ids), -1, dtype=dtypes["desc_id"])
if not self.links:
return
for i in range(descids.size):
descids[i] = self.links.get(my_ids[i], -1)