Source code for ytree.frontends.ahf.io

"""
AHFArbor io classes and member functions



"""

# -----------------------------------------------------------------------------
# Copyright (c) ytree development team. All rights reserved.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# -----------------------------------------------------------------------------

from collections import defaultdict
import numpy as np
import os
import re
import weakref

from ytree.frontends.ahf.misc import parse_AHF_file
from ytree.data_structures.io import CatalogDataFile
from ytree.utilities.io import f_text_block
from ytree.utilities.misc import fround


[docs] class AHFDataFile(CatalogDataFile): _redshift_precision = 3
[docs] def __init__(self, filename, arbor): self.arbor = weakref.proxy(arbor) self.filename = filename self._catalog_index = arbor._get_file_index(filename) self.filekey = self.filename[: self.filename.rfind(self.arbor._par_suffix)] self._parse_header() rprec = self._redshift_precision rstr = r"\.z\d\.\d\{%d\}" % rprec res = re.search(rstr, self.filekey) if res: self.data_filekey = self.filekey[: res.end()] else: zfmt = f".0{rprec}f" for inc in [0, -(10**-rprec)]: my_z = format(fround(self.redshift, decimals=rprec) + inc, zfmt) fkey = f"{self.filekey}.z{my_z}" if os.path.exists(fkey + self.arbor._data_suffix): self.data_filekey = fkey break if not hasattr(self, "data_filekey"): raise FileNotFoundError( f"Cannot find data file: {fkey + self.arbor._data_suffix}." ) self._get_other_filenames() self.fh = None self._parse_data_header() self.offsets = None
def _get_other_filenames(self): """ Figure out the various additional filenames we're going to need here. If prefixes for the ahf and mtree files have been set, then we need to look out for this and adjust. """ self.halos_filename = self.data_filekey + self.arbor._data_suffix if self.arbor._ahf_prefix is None: mtree_key = self.data_filekey else: # First, strip of the directory mtree_key = re.sub( f"^{self.arbor.directory}{os.path.sep}", "", self.data_filekey ) # Now, substitute the prefixes mtree_key = re.sub( f"^{self.arbor._ahf_prefix}", f"{self.arbor._mtree_prefix}", mtree_key ) # Return the directory mtree_key = os.path.join(self.arbor.directory, mtree_key) self.mtree_filename = mtree_key + self.arbor._mtree_suffix if not os.path.exists(self.mtree_filename): self.mtree_filename = None def _parse_data_header(self): """ Get header sizes from the two data files ending in .AHF_halos and .AHF_mtree. """ self.open() fh = self.fh fh.seek(0, 2) self.file_size = fh.tell() fh.seek(0) for line, loc in f_text_block(fh): if not line.startswith("#"): loc -= len(line) + 1 break self._hoffset = loc + len(line) + 1 self.close() def _parse_header(self): """ Get header information from the .log and .parameter files. Use that to get the name of the data file. """ vals = parse_AHF_file(self.filekey + ".parameter", {"z": "redshift"}) for par, val in vals.items(): setattr(self, par, val) def open(self): if self.fh is None: self.fh = open(self.halos_filename, "r") _links = None @property def links(self): if self._links is None: self._compute_links() return self._links def _compute_links(self): """ Compute the tree from the graph. AHF computes a graph, where a given halo can have both multiple progenitors and descendents. Use the weight function to determine a unique descendent for each halo. descendent = max (M_ij = N_ij^2 / (N_i * N_j)), where: N_ij: number of shared particles N_i : number of particles in halo i N_j : number of particles in halo j """ data = self._read_mtree() if data is None: self._links = -1 return m = data["shared"] ** 2 / (data["prog_part"] * data["desc_part"]) progids = np.unique(data["prog_id"]) descids = np.empty(progids.size, dtype=progids.dtype) for i, progid in enumerate(progids): prf = data["prog_id"] == progid descids[i] = data["desc_id"][prf][m[prf].argmax()] udata = {"prog_id": progids, "desc_id": descids} self._links = udata def _read_mtree(self): """ Read map of progenitors to descendents. This is the ".AHF_mtree" file. """ if self.mtree_filename is None: return None data = defaultdict(list) descid = descpart = None f = open(self.mtree_filename, "r") for line, offset in f_text_block(f): if line.startswith("#"): continue if line[0].isdigit(): oline = line.split() descid = int(oline[0]) descpart = int(oline[1]) else: oline = line.split() data["shared"].append(int(oline[0])) data["prog_id"].append(int(oline[1])) data["prog_part"].append(int(oline[2])) data["desc_id"].append(descid) data["desc_part"].append(descpart) f.close() if not data: return None for field in data: data[field] = np.array(data[field]) return data def _read_data_default(self, rfields, dtypes): if not rfields: return {} fi = self.arbor.field_info field_data = self._create_field_arrays(rfields, dtypes) offsets = [] self.open() f = self.fh f.seek(self._hoffset) file_size = self.file_size - self._hoffset for line, offset in f_text_block(f, file_size=file_size): offsets.append(offset) sline = line.split() for field in rfields: dtype = dtypes[field] field_data[field].append(dtype(sline[fi[field]["column"]])) self.close() if self.offsets is None: self.offsets = np.array(offsets) return field_data def _read_data_select(self, rfields, tree_nodes, dtypes): if not rfields: return {} fi = self.arbor.field_info nt = len(tree_nodes) field_data = self._create_field_arrays(rfields, dtypes, size=nt) self.open() f = self.fh for i in range(nt): f.seek(self.offsets[tree_nodes[i]._fi]) line = f.readline() sline = line.split() for field in rfields: dtype = dtypes[field] field_data[field][i] = dtype(sline[fi[field]["column"]]) self.close() return field_data def _get_mtree_fields(self, tfields, dtypes, field_data): """ Use data from the mtree file to get descendent ids. """ if not tfields: return links = self.links descids = np.empty(len(field_data["ID"]), dtype=dtypes["desc_id"]) if self.links == -1: descids[:] = -1 else: for i, hid in enumerate(field_data["ID"]): inlink = hid == links["prog_id"] if not inlink.any(): descids[i] = -1 else: descids[i] = links["desc_id"][np.where(inlink)[0][0]] field_data["desc_id"] = descids def _read_fields(self, fields, tree_nodes=None, dtypes=None): if dtypes is None: dtypes = {} fi = self.arbor.field_info afields = [field for field in fields if fi[field].get("source") == "arbor"] my_fields = set(fields).difference(afields) # Separate fields into one that come from the file header, # the mtree file, and the halos file. data_fields = defaultdict(list) for field in my_fields: source = fi[field]["file"] data_fields[source].append(field) hfields = data_fields.pop("header", []) rfields = data_fields.pop("halos", []) tfields = data_fields.pop("mtree", []) # If we needs desc_ids, make sure to get IDs so # we can link them. if tfields: if "ID" not in rfields: rfields.append("ID") dtypes.update(self.arbor._node_io._determine_dtypes(["ID"])) field_data = {} if tree_nodes is None: field_data.update(self._read_data_default(rfields, dtypes)) else: # fields from the actual data field_data.update(self._read_data_select(rfields, tree_nodes, dtypes)) # fields from arbor-related info field_data.update(self._get_arbor_fields(afields, tree_nodes, dtypes)) # fields from the file header field_data.update(self._get_header_fields(hfields, tree_nodes, dtypes)) # use data from the mtree file to get descendent ids self._get_mtree_fields(tfields, dtypes, field_data) return field_data
class AHFCRMDataFile(AHFDataFile): def _compute_links(self): """ Read the CRMratio2 file. """ def _compute_links(self): self.arbor._compute_links() def _get_mtree_fields(self, tfields, dtypes, field_data): if not tfields: return my_ids = field_data["ID"] field_data["desc_id"] = descids = np.full( len(my_ids), -1, dtype=dtypes["desc_id"] ) if not self.links: return for i in range(descids.size): descids[i] = self.links.get(my_ids[i], -1)