Source code for subsurface.structs.base_structures.common_data_utils

import os
from typing import Union

from subsurface.structs.base_structures.structured_data import StructuredData
from subsurface.structs.base_structures.unstructured_data import UnstructuredData


__all__ = ['replace_outliers', 'to_netcdf', 'default_path_and_name']


[docs]def replace_outliers(base_data: Union[StructuredData, UnstructuredData], dim=0, perc=0.99, replace_for=None): """@Edoardo Guerreiro https://stackoverflow.com/questions/60816533/ is-there-a-built-in-function-in-xarray-to-remove-outliers-from-a-dataset""" data = base_data.data # calculate percentile threshold = data[dim].quantile(perc) # find outliers and replace them with max among remaining values mask = data[dim].where(abs(data[dim]) <= threshold) if replace_for == 'max': max_value = mask.max().values # .where replace outliers with nan mask = mask.fillna(max_value) elif replace_for == 'min': min_value = mask.min().values # .where replace outliers with nan mask = mask.fillna(min_value) print(mask) data[dim] = mask return data
[docs]def to_netcdf(base_data: Union[StructuredData, UnstructuredData], path=None, file: str = None, **kwargs): if path is None and file is not None: name, path = default_path_and_name(path, file) return base_data.data.to_netcdf(path, **kwargs)
[docs]def default_path_and_name(path, name='subsurface_data.nc', ): if not path: path = './' if os.path.isdir(path): print("Directory already exists, files will be overwritten") else: os.makedirs(f'{path}') path = f'{path}/{name}' return name, path