Source code for pyatoa.core.config

#!/usr/bin/env python3
"""
Configuration of User-set parameters within the package.
Contains external functions to set Config objects of Pyflex and Pyadjoint.
"""
import yaml
import numpy as np
from copy import deepcopy
from pyatoa.utils.form import format_iter, format_step

from pyflex import Config as PyflexConfig
from pyadjoint import get_config as get_pyadjoint_config
from pyadjoint import ADJSRC_TYPES


[docs] class Config: """ The Config class is the main interaction object between the User and workflow. It is used by :class:`Manager <pyatoa.core.manager.Manager>` for workflow management, and also for information sharing between Pyatoa objects and functions. The Config can be read to and written from external files and ASDFDataSets. """ def __init__(self, yaml_fid=None, ds=None, path=None, iteration=None, step_count=None, event_id=None, min_period=10, max_period=100, rotate_to_rtz=False, unit_output="DISP", component_list=None, adj_src_type="cc_traveltime", observed_tag="observed", synthetic_tag=None, st_obs_type="obs", st_syn_type="syn", win_amp_ratio=0., pyflex_parameters=None, pyadjoint_parameters=None): """ Initiate the Config object either from scratch, or read from external. .. note:: keyword arguments are passed to Pyflex and Pyadjoint config objects so that there is only one entry point to all config objects. :type yaml_fid: str :param yaml_fid: id for .yaml file if config is to be loaded externally :type iteration: int :param iteration: if running an inversion, the current iteration. Used for internal path naming, as well as interaction with Seisflows via Pyaflowa. :type step_count: int :param step_count: if running an inversion, the current step count in the line search, will be used for internal path naming, and interaction with Seisflows via Pyaflowa. :type event_id: str :param event_id: unique event identifier for data gathering, annotations :type min_period: float :param min_period: minimum bandpass filter period :type max_period: float :param max_period: maximum bandpass filter period :type rotate_to_rtz: bool :param rotate_to_rtz: components from NEZ to RTZ :type unit_output: str :param unit_output: units of stream, to be fed into preprocessor for instrument response removal. Available: 'DISP', 'VEL', 'ACC' :type adj_src_type: str :param adj_src_type: method of misfit quantification for Pyadjoint :type st_obs_type: str :param st_obs_type: Tell Pyatoa how to treat `st_obs`, either - 'data': as data, which involves instrument response removal and data gathering based on SEED formatted directories - 'syn': as syntheitcs, which skips instrument response removal and data gathering is based on simpler synthetic dir. structure Defaults to 'data' :type st_syn_type: str :param st_syn_type: Tell Pyatoa how to treat `st_syn`, either - 'data': as data, which involves instrument response removal and data gathering based on SEED formatted directories - 'syn': as syntheitcs, which skips instrument response removal and data gathering is based on simpler synthetic dir. structure Defaults to 'syn' :type observed_tag: str :param observed_tag: Tag to use for asdf dataset to label and search for obspy streams of observation data. Defaults 'observed' :type synthetic_tag: str :param synthetic_tag: Tag to use for asdf dataset to label and search for obspy streams of synthetic data. Default 'synthetic_{model_num}' Tag must be formatted before use. :type pyflex_parameters: dict :param pyflex_parameters: overwrite for Pyflex parameters defined in the Pyflex.Config object. Incorrectly defined argument names will raise a TypeError. See Pyflex docs for detailed parameter defs: http://adjtomo.github.io/pyflex/#config-object :type pyadjoint_parameters: dict :param pyadjoint_parameters: overwrite for Pyadjoint parameters defined in the Pyadjoint.Config object for the given `adj_src_type`. Incorrectly defined argument names will raise a TypeError. See Pyadjoint docs for detailed parameter definitions: https://adjtomo.github.io/pyadjoint/ :raises TypeError: If incorrect arguments provided to the underlying Pyflex or Pyadjoint Config objects. """ self.iteration = iteration self.step_count = step_count self.event_id = event_id self.min_period = min_period self.max_period = max_period self.rotate_to_rtz = rotate_to_rtz self.unit_output = unit_output.upper() self.observed_tag = observed_tag # Allow manual override of synthetic tag, but keep internal and rely # on calling property for actual value self._synthetic_tag = synthetic_tag self.adj_src_type = adj_src_type self.st_obs_type = st_obs_type self.st_syn_type = st_syn_type self.win_amp_ratio = win_amp_ratio self.component_list = component_list # To be filled in by reading or with default parameters self.pyflex_config = None self.pyadjoint_config = None # If reading from a YAML file or from a dataset, do not set the external # Configs (pyflex and pyadjoint) because these will be read in verbatim if ds or yaml_fid: if ds: assert(path is not None), "'path' required to load from dataset" self._read_asdf(ds, path=path) elif yaml_fid: self._read_yaml(yaml_fid) # If initiating normally, need to set external Configs based on map # names and keyword arguments else: # Set Pyflex and Pyadjoint Config objects as attributes pyflex_parameters = pyflex_parameters or {} self.pyflex_config = PyflexConfig(min_period=min_period, max_period=max_period, **pyflex_parameters) pyadjoint_parameters = pyadjoint_parameters or {} # Double difference flag will be set by the adjoint source type self.pyadjoint_config = get_pyadjoint_config( adjsrc_type=adj_src_type, min_period=min_period, max_period=max_period, **pyadjoint_parameters ) # Run internal sanity checks self._check()
[docs] def __str__(self): """ String representation of the class for print statements. It separates information into similar bins for readability. """ # Model and step need to be formatted before printing str_out = ("CONFIG\n" f" {'iteration:':<25}{self.iter_tag}\n" f" {'step_count:':<25}{self.step_tag}\n" f" {'event_id:':<25}{self.event_id}\n" ) # Format the remainder of the keys identically key_dict = {"Process": ["min_period", "max_period", "unit_output", "rotate_to_rtz", "win_amp_ratio", "st_obs_type", "st_syn_type"], "Labels": ["component_list", "observed_tag", "synthetic_tag"], "External": ["adj_src_type", "pyflex_config", "pyadjoint_config" ] } for key, items in key_dict.items(): str_out += f"{key.upper()}\n" for item in items: str_out += f" {item+':':<25}{getattr(self, item)}\n" return str_out
[docs] def __repr__(self): """Simple call string representation""" return self.__str__()
@property
[docs] def pfcfg(self): """simple dictionary print of pyflex config object""" return vars(self.pyflex_config)
@property
[docs] def pacfg(self): """simple dictionary print of pyflex config object""" return vars(self.pyadjoint_config)
@property
[docs] def iter_tag(self): """string formatted version of iteration, e.g. 'i00'""" if self.iteration is not None: return format_iter(self.iteration) else: return None
@property
[docs] def step_tag(self): """string formatted version of step, e.g. 's00'""" if self.step_count is not None: return format_step(self.step_count) else: return None
@property
[docs] def eval_tag(self): """string formatted version of iter and step, e.g. 'i01s00'""" return f"{self.iter_tag}{self.step_tag}"
@property
[docs] def synthetic_tag(self): """tag to be used for synthetic data, uses iteration and step count""" if self._synthetic_tag is not None: return self._synthetic_tag # If no override value given, fall back to default tag = self._get_aux_path(default=None, separator='') if tag is not None: return f"synthetic_{tag}" else: return "synthetic"
@property
[docs] def aux_path(self): """property to quickly get a bog-standard aux path e.g. i00/s00""" return self._get_aux_path()
[docs] def _check(self): """ A series of sanity checks to make sure that the configuration parameters are set properly to avoid any problems throughout the workflow. Should normally be run after any parameters are changed to make sure that they are acceptable. """ if self.iteration is not None: assert(self.iteration >= 1), "Iterations must start at 1" if self.step_count is not None: assert(self.step_count >= 0), "Step count must start from 0" # Check period range is acceptable if self.min_period and self.max_period: assert(self.min_period < self.max_period), \ "min_period must be less than max_period" # Check if unit output properly set, dictated by ObsPy units acceptable_units = ['DISP', 'VEL', 'ACC'] assert(self.unit_output in acceptable_units), \ f"unit_output should be in {acceptable_units}" # Set the component list. Rotate component list if necessary if self.rotate_to_rtz: if not self.component_list: self.component_list = ["R", "T", "Z"] else: for comp in ["N", "E"]: assert(comp not in self.component_list), \ f"rotated component list cannot include '{comp}'" else: if not self.component_list: self.component_list = ["E", "N", "Z"] # Check that the amplitude ratio is a reasonable number if self.win_amp_ratio > 0: assert(self.win_amp_ratio < 1), \ "window amplitude ratio should be < 1" assert(self.adj_src_type in ADJSRC_TYPES), \ f"Pyadjoint `adj_src_type` must be in {ADJSRC_TYPES}"
[docs] def _get_aux_path(self, default="default", separator="/"): """ Pre-formatted path to be used for tagging and identification in ASDF dataset auxiliary data. Internal function to be called by property aux_path. :type default: str :param default: if no iteration or step information is given, path will default to this string. By default it is 'default'. :type separator: str :param separator: if an iteration and step_count are available, separator will be placed between. Defaults to '/', use '' for no separator. """ if (self.iter_tag is not None) and self.step_tag is not None: # model/step/window_tag path = separator.join([self.iter_tag, self.step_tag]) elif self.iter_tag is not None: path = self.iter_tag else: path = default return path
@staticmethod
[docs] def _check_io_format(fid, fmt=None): """ A simple check before reading or writing the config to determine what file format to use. Currently accepted file formats are yaml, asdf and ascii. :type fmt: str :param fmt: format specified by the User :rtype: str :return: format string to be understood by the calling function """ acceptable_formats = ["yaml", "asdf", "ascii"] if fmt not in acceptable_formats: # If no format given, try to guess the format based on file ending from pyasdf.asdf_data_set import ASDFDataSet if isinstance(fid, str): if ("yaml" or "yml") in fid: return "yaml" elif ("txt" or "ascii") in fid: return "ascii" else: raise TypeError( "format must be given in {acceptable_formats}") elif isinstance(fid, ASDFDataSet): return "asdf" else: raise TypeError("file must be given in {acceptable_formats}") else: return fmt
[docs] def copy(self): """ Simply convenience function to return a deep copy of the Config """ return deepcopy(self)
[docs] def write(self, write_to, fmt=None): """ Wrapper for underlying low-level write functions :type fmt: str :param fmt: format to save parameters to. Available: * yaml: Write all parameters to a .yaml file which can be read later * ascii: Write parameters to a simple ascii file, not very smart and yaml is prefereable in most cases * asdf: Save the Config into an ASDFDataSet under the auxiliary data attribute :type write_to: str or pyasdf.ASDFDataSet :param write_to: filename to save config to, or dataset to save to """ fmt = self._check_io_format(write_to, fmt) if fmt.lower() == "ascii": self._write_ascii(write_to) elif fmt.lower() == "yaml": self._write_yaml(write_to) elif fmt.lower() == "asdf": self._write_asdf(write_to)
[docs] def read(self, read_from, path=None, fmt=None): """ Wrapper for underlying low-level read functions :type read_from: str or pyasdf.asdf_data_set.ASDFDataSet :param read_from: filename to read config from, or ds to read from :type path: str :param path: if fmt='asdf', path to the config in the aux data :type fmt: str :param fmt: file format to read parameters from, will be guessed but can also be explicitely set (available: 'yaml', 'ascii', 'asdf') """ fmt = self._check_io_format(read_from, fmt) if fmt.lower() == "yaml": try: self._read_yaml(read_from) except ValueError as e: print(f"Unknown yaml format for file {read_from}, {e}") elif fmt.lower() == "asdf": assert(path is not None), "path must be defined" self._read_asdf(read_from, path=path)
[docs] def _write_yaml(self, filename): """ Write config parameters to a yaml file, retain order :type filename: str :param filename: filename to save yaml file """ from os.path import splitext # Ensure file ending if splitext(filename)[1] != ".yaml": filename += ".yaml" with open(filename, "w") as f: yaml.dump(vars(self), f, default_flow_style=False, sort_keys=False)
[docs] def _write_asdf(self, ds): """ Save the Config values as a parameter dictionary in the ASDF Data set Converts types to play nice with ASDF Auxiliary Data. Flattens dictionaries and external Config objects for easy storage. :type ds: pyasdf.asdf_data_set.ASDFDataSet :param ds: dataset to save the config file to """ # Deep copy to ensure that we aren't editing the Config parameters attrs = vars(deepcopy(self)) add_attrs = {} del_attrs = [] for key, item in attrs.items(): if item is None: # HDF doesn't support NoneType so convert to string attrs[key] = "None" elif isinstance(item, dict) or ("config" in key): # Flatten dictionaries, add prefix, delete original try: # Config objects will need to be converted to dictionaries vars_ = vars(item) except TypeError: vars_ = item # Prepend a prefix for easier read-back, also convert NoneTypes vars_ = {f"{key}_{k}": ('' if i is None else i) for k, i in vars_.items() } del_attrs.append(key) add_attrs.update(vars_) # Update the dictionary after the fact for key in del_attrs: attrs.pop(key) attrs.update(add_attrs) ds.add_auxiliary_data(data_type="Configs", data=np.array([True]), path=self.aux_path, parameters=attrs )
[docs] def _write_ascii(self, filename): """ Write the config parameters to an ascii file :type filename: str :param filename: filename to write the ascii file to """ attrs = vars(self) with open(filename, "w") as f: f.write("PYATOA CONFIGURATION FILE\n") for key_a, item_a in attrs.items(): # Excludes writing the Pyflex and Pyadjoint Config classes, but # instead writes the parameters of those Configs separately try: attrs_b = vars(item_a) f.write(f"{key_a}\n") for key_b, item_b in attrs_b.items(): f.write(f"\t{key_b}: {item_b}\n") except TypeError: f.write(f"{key_a}: {item_a}\n")
[docs] def _read_yaml(self, filename): """ Read config parameters from a yaml file, parse to attributes. :type filename: str :param filename: filename to save yaml file :rtype: dict :return: key word arguments that do not belong to Pyatoa are passed back as a dictionary object, these are expected to be arguments that are to be used in Pyflex and Pyadjoint configs :raises ValueError: if unrecognized kwargs are found in the yaml file """ with open(filename, "r") as f: attrs = yaml.load(f, Loader=yaml.Loader) unused_kwargs = {} for key, item in attrs.items(): if hasattr(self, key.lower()): setattr(self, key.lower(), item) else: unused_kwargs[key.lower()] = item if unused_kwargs: raise ValueError(f"{list(unused_kwargs)} are not recognized " "keyword arguments for a Config yaml file. Maybe " "you meant to use the parameter 'seisflows_yaml'" )
[docs] def _read_asdf(self, ds, path): """ Read and set config parameters from an ASDF Dataset, assumes that all necessary parameters are located in the auxiliary data subgroup of the dataset, which will be the case if the write_to_asdf() function was used Assumes some things about the structure of the auxiliary data. :type ds: pyasdf.asdf_data_set.ASDFDataSet :param ds: dataset with config parameter to read :type path: str :param path: model number e.g. 'm00' or 'default', or 'm00/s00' """ # Check if nested paths are provided splitpath = path.split("/") if len(splitpath) > 1: cfgin = ds.auxiliary_data.Configs for p in splitpath: cfgin = cfgin[p] cfgin = cfgin.parameters else: cfgin = ds.auxiliary_data.Configs[path].parameters # Parameters from flattened dictionaries will need special treatment pyflex_config, pyadjoint_config = {}, {} for key, item in cfgin.items(): # Convert the item into expected native Python objects if isinstance(item, str): item = None if (item == "None" or item == "") else item else: try: item = item.item() except ValueError: item = item.tolist() # Put the item in the correct dictionary if "pyflex_config" in key: # Ensure that empties are set to NoneType pyflex_config["_".join(key.split('_')[2:])] = item elif "pyadjoint_config" in key: # e.g. pyadjoint_config_dlna_sigma_min -> dlna_sigma_min pyadjoint_config["_".join(key.split('_')[2:])] = item else: # Normal Config attribute setattr(self, key, item) # Set Pyflex and Pyadjoint Config objects as attributes self.pyflex_config = PyflexConfig(**pyflex_config) # Double difference is stored but not required pyadjoint_config.pop("double_difference") self.pyadjoint_config = get_pyadjoint_config(**pyadjoint_config)