Getting started

Authors: Miguel de la Varga and Alexander Juestel

This example shows how to read into subsurface structures a bunch of different data such as:

  • well data -> from csv all in one single file

  • topography -> from tif

  • Cross sections with textures from trace -> shp, png

  • Exported gempy model from netcdf

import shutil
import pandas as pd

import subsurface as ss
import pooch

Read wells:

We can read well data - using welly as the backend. First we need to have the data in the right format for digesting.

# Pulling data example
import subsurface.reader.profiles.profiles_core
import subsurface.reader.read_netcdf
import subsurface.reader.topography.topo_core
import subsurface.reader.wells.wells_api
from subsurface.reader.wells.wells_api import read_wells_to_unstruct, borehole_location_to_unstruct
from subsurface.reader.readers_data import ReaderWellsHelper, ReaderFilesHelper
from subsurface.structs.base_structures.common_data_utils import replace_outliers

model_file = pooch.retrieve(
    url="https://github.com/cgre-aachen/gempy_data/raw/master/"
        "data/subsurface/example1.zip",
    known_hash=None
)

data_path = model_file[:-4]
shutil.unpack_archive(model_file, extract_dir=model_file[:-4])

# Read original file
ori_wells = pd.read_csv(data_path + '/wells.csv')

# Add top and base columns
wells_fixed = ss.reader.wells.add_tops_from_base_and_altitude_in_place(
    ori_wells,
    'Index',
    'Z',
    'Altitude'
)

wells_fixed['md'] = wells_fixed['top']
wells_fixed.to_csv(data_path + '/wells.csv')
wells_fixed

Out:

/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/pandas/core/indexing.py:1773: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)
/home/runner/work/subsurface/subsurface/examples/getting_started/getting_started.py:65: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wells_fixed['md'] = wells_fixed['top']
Unnamed: 0 Unnamed: 0.1 Unnamed: 0.1.1 Index X Y Z Altitude Depth formation base top _top_abs md
0 0 0 0 GD0001 32428405.75 5784686.57 131.8 132.0 59.0 Quaternary 0.2 0.0 132.0 0.0
1 1 1 1 GD0001 32428405.75 5784686.57 119.2 132.0 59.0 MittlererBuntsandsteinGP 12.8 0.2 132.2 0.2
2 2 2 2 GD0001 32428405.75 5784686.57 86.8 132.0 59.0 Zechstein 45.2 12.8 144.8 12.8
3 3 3 3 GD0001 32428405.75 5784686.57 84.0 132.0 59.0 UntererKeuperGP 48.0 45.2 177.2 45.2
4 4 4 4 GD0001 32428405.75 5784686.57 73.0 132.0 59.0 UnterJura 59.0 48.0 180.0 48.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2793 2793 2793 2860 GD1145 32416535.22 5771551.96 -1910.0 50.0 2007.0 BochumFM 1960.0 1810.0 1860.0 1810.0
2794 2794 2794 2861 GD1145 32416535.22 5771551.96 -1957.0 50.0 2007.0 Carboniferous 2007.0 1960.0 2010.0 1960.0
2795 2795 2795 2862 GD1146 32407713.16 5742143.75 59.5 60.0 1135.0 Quaternary 0.5 0.0 60.0 0.0
2796 2796 2796 2863 GD1146 32407713.16 5742143.75 -865.0 60.0 1135.0 UnterSantonium 925.0 0.5 60.5 0.5
2797 2797 2797 2864 GD1146 32407713.16 5742143.75 -1075.0 60.0 1135.0 UnterTuronium 1135.0 925.0 985.0 925.0

2787 rows × 14 columns



Now we can read the csv files into subsurface.UnstructuredData

To help loading files we can use the following helper dataclass

reader_collars_helper = ReaderFilesHelper(file_or_buffer=data_path + '/wells.csv',
                                          usecols=['Index', 'X', 'Y', 'Altitude'], header=0)
reader_collars_helper

Out:

ReaderFilesHelper(file_or_buffer='/home/runner/.cache/pooch/2ff36b994aa925b842ddd264256335a9-example1/wells.csv', usecols=['Index', 'X', 'Y', 'Altitude'], col_names=None, drop_cols=None, format='.csv', index_map=None, columns_map=None, additional_reader_kwargs={}, file_or_buffer_type=<class 'str'>, index_col=False, header=0)
reader_wells_helper = ReaderWellsHelper(
    reader_collars_args=reader_collars_helper,
    reader_survey_args=ReaderFilesHelper(
        file_or_buffer=data_path + '/wells.csv',
        index_col="Index",
        usecols=['Index', 'md']
    ),
    reader_lith_args=ReaderFilesHelper(
        file_or_buffer=data_path + '/wells.csv',
        index_col="Index",
        columns_map={'top': 'top', 'base': 'base', 'formation': 'component lith'},
        usecols=['Index', 'top', 'base', 'formation']
    )
)
reader_wells_helper

Out:

ReaderWellsHelper(reader_collars_args=ReaderFilesHelper(file_or_buffer='/home/runner/.cache/pooch/2ff36b994aa925b842ddd264256335a9-example1/wells.csv', usecols=['Index', 'X', 'Y', 'Altitude'], col_names=None, drop_cols=None, format='.csv', index_map=None, columns_map=None, additional_reader_kwargs={}, file_or_buffer_type=<class 'str'>, index_col=False, header=0), reader_survey_args=ReaderFilesHelper(file_or_buffer='/home/runner/.cache/pooch/2ff36b994aa925b842ddd264256335a9-example1/wells.csv', usecols=['Index', 'md'], col_names=None, drop_cols=None, format='.csv', index_map=None, columns_map=None, additional_reader_kwargs={}, file_or_buffer_type=<class 'str'>, index_col='Index', header='infer'), reader_lith_args=ReaderFilesHelper(file_or_buffer='/home/runner/.cache/pooch/2ff36b994aa925b842ddd264256335a9-example1/wells.csv', usecols=['Index', 'top', 'base', 'formation'], col_names=None, drop_cols=None, format='.csv', index_map=None, columns_map={'top': 'top', 'base': 'base', 'formation': 'component lith'}, additional_reader_kwargs={}, file_or_buffer_type=<class 'str'>, index_col='Index', header='infer'), reader_attr_args=None)
wells_unstructured_data = read_wells_to_unstruct(reader_wells_helper)

Out:

/home/runner/work/subsurface/subsurface/subsurface/reader/wells/well_files_reader.py:140: UserWarning: inc and/or azi columns are not present in the file. The boreholes will be straight.
  warnings.warn('inc and/or azi columns are not present in the file.'
The following striplog failed being processed:  ['GD0041', 'GD0080', 'GD0092', 'GD0136', 'GD0211', 'GD0225', 'GD0235', 'GD0236', 'GD0304', 'GD0305', 'GD0308', 'GD0377', 'GD0399', 'GD0419', 'GD0439', 'GD0464', 'GD0487', 'GD0504', 'GD0505', 'GD0526', 'GD0529', 'GD0573', 'GD0597', 'GD0611', 'GD0617', 'GD0622', 'GD0632', 'GD0715', 'GD0733', 'GD0760', 'GD0766', 'GD0813', 'GD0816', 'GD0843', 'GD0845', 'GD0858', 'GD0937', 'GD0942', 'GD0948', 'GD0981', 'GD1007', 'GD1016']
/home/runner/work/subsurface/subsurface/subsurface/reader/wells/welly_reader.py:116: UserWarning: At least one of the wells do not have assigned a survey. Borehole name: {w.name}
  warnings.warn(f'At least one of the wells do not have '
The following boreholes failed being processed:  ['GD0304', 'GD0981', 'GD0733', 'GD0041', 'GD0487', 'GD0080', 'GD0948', 'GD0632', 'GD0419', 'GD0464', 'GD0813', 'GD0236', 'GD0377', 'GD0816', 'GD0937', 'GD0715', 'GD0766', 'GD0225', 'GD0611', 'GD0136', 'GD0526', 'GD0622', 'GD0235', 'GD0211', 'GD0504', 'GD0505', 'GD0399', 'GD1007', 'GD0858', 'GD0305', 'GD0597', 'GD1016', 'GD0845', 'GD0617', 'GD0092', 'GD0942', 'GD0573', 'GD0308', 'GD0529', 'GD0843', 'GD0439', 'GD0760']
wells_unstructured_data

Out:

<xarray.Dataset>
Dimensions:       (points: 88160, XYZ: 3, cell: 87058, nodes: 2, cell_attr: 1, vertex_attr: 0)
Coordinates:
  * points        (points) int64 0 1 2 3 4 5 ... 88155 88156 88157 88158 88159
  * XYZ           (XYZ) <U1 'X' 'Y' 'Z'
  * cell_attr     (cell_attr) object 'lith_log'
  * vertex_attr   (vertex_attr) int64
    UWI           (cell) object 'GD0800' 'GD0800' 'GD0800' ... 'GD0416' 'GD0416'
    Depth         (cell) float64 0.1538 0.4614 0.769 ... 0.9684 0.981 0.9937
Dimensions without coordinates: cell, nodes
Data variables:
    vertex        (points, XYZ) float64 3.241e+07 5.774e+06 ... 5.756e+06 131.5
    cells         (cell, nodes) int64 0 1 1 2 2 ... 88157 88158 88158 88159
    cell_attrs    (cell, cell_attr) int64 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1
    vertex_attrs  (points, vertex_attr) float64

From UnstructuredData we can create geometric objects such a lines, points, meshes, etc. In the case of boreholes subsurface.LineSet is the most suitable geometric representation.

wells_element = ss.LineSet(wells_unstructured_data)

All elements in subsurface have their direct link to a pyvista mesh. This transformation can be done by the functions to_pyvista_….

# Pyvista mesh
wells_mesh = ss.visualization.to_pyvista_line(wells_element, radius=50)

# Plotting
ss.visualization.pv_plot(
    [wells_mesh],
    image_2d=False,
    ve=5
)
getting started

Out:

/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/pyvista/core/dataset.py:1332: PyvistaDeprecationWarning: Use of `cell_arrays` is deprecated. Use `cell_data` instead.
  warnings.warn(

<pyvista.plotting.plotting.Plotter object at 0x7f62fb00c340>

We can do the same for point data, for example the borehole location.

# UnstructuredData
borehole_location_struct = borehole_location_to_unstruct(
    ReaderFilesHelper(
        file_or_buffer=data_path + '/wells.csv',
        usecols=['Index', 'X', 'Y', 'Altitude'],
        header=0,
        columns_map={'X': 'x', 'Y': 'y', 'Altitude': 'altitude'}
    )
)
borehole_location_struct

Out:

<xarray.Dataset>
Dimensions:       (points: 1144, XYZ: 3, cell: 1144, nodes: 1, cell_attr: 1, vertex_attr: 0)
Coordinates:
  * points        (points) int64 0 1 2 3 4 5 6 ... 1138 1139 1140 1141 1142 1143
  * XYZ           (XYZ) <U1 'X' 'Y' 'Z'
  * cell_attr     (cell_attr) object 'number_segments'
  * vertex_attr   (vertex_attr) int64
    cell_         (cell) int64 0 1 2 3 4 5 6 ... 1138 1139 1140 1141 1142 1143
Dimensions without coordinates: cell, nodes
Data variables:
    vertex        (points, XYZ) float32 3.243e+07 5.785e+06 ... 5.742e+06 60.0
    cells         (cell, nodes) int64 0 1 2 3 4 5 ... 1139 1140 1141 1142 1143
    cell_attrs    (cell, cell_attr) float32 5.0 3.0 4.0 2.0 ... 9.0 11.0 9.0 3.0
    vertex_attrs  (points, vertex_attr) float64
Attributes:
    wells_names:  ['GD0001', 'GD0002', 'GD0003', 'GD0004', 'GD0005', 'GD0006'...

Element

borehole_location_point_set = ss.PointSet(borehole_location_struct)
borehole_location_point_set

Out:

<subsurface.structs.unstructured_elements.PointSet object at 0x7f62fb490820>

Pyvista mesh

borehole_loc_mesh = ss.visualization.to_pyvista_points(borehole_location_point_set)
borehole_loc_mesh

Out:

/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/pyvista/core/dataset.py:1192: PyvistaDeprecationWarning: Use of `point_arrays` is deprecated. Use `point_data` instead.
  warnings.warn(
HeaderData Arrays
PolyDataInformation
N Cells1144
N Points1144
X Bounds3.238e+07, 3.243e+07
Y Bounds5.735e+06, 5.785e+06
Z Bounds3.500e+01, 2.304e+02
N Arrays1
NameFieldTypeN CompMinMax
number_segmentsPointsfloat6411.000e+001.900e+01


ss.visualization.pv_plot([borehole_loc_mesh], image_2d=False, ve=5)
getting started

Out:

<pyvista.plotting.plotting.Plotter object at 0x7f62fb00ceb0>

Read Topography

# StructuredData
topo_structured_data = subsurface.reader.topography.topo_core.read_structured_topography(data_path + '/DEM50.tif')
topo_structured_data

Out:

StructuredData(data=<xarray.Dataset>
Dimensions:     (x: 2000, y: 2800)
Coordinates:
  * x           (x) float64 3.25e+07 3.25e+07 3.25e+07 ... 3.236e+07 3.236e+07
  * y           (y) float64 5.7e+06 5.7e+06 5.7e+06 ... 5.8e+06 5.8e+06 5.8e+06
Data variables:
    topography  (x, y) float32 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0, data_array_name='topography')

Remove outliers

replace_outliers(topo_structured_data, 'topography', 0.98)

# Element
topo_element = ss.structs.StructuredGrid(topo_structured_data)
topo_element

# Pyvista mesh
topo_mesh = ss.visualization.to_pyvista_grid(topo_element, 'topography',
                                             data_order='C')

Out:

<xarray.DataArray 'topography' (x: 2000, y: 2800)>
array([[  0.  ,   0.  ,   0.  , ...,  40.1 ,  40.09,  44.58],
       [  0.  ,   0.  ,   0.  , ...,  40.08,  40.07,  44.21],
       [  0.  ,   0.  ,   0.  , ...,  40.14,  44.21,  43.98],
       ...,
       [100.56, 102.14, 102.17, ...,   0.  ,   0.  ,   0.  ],
       [ 99.44,  99.85,  99.77, ...,   0.  ,   0.  ,   0.  ],
       [ 88.32,  91.76,  98.68, ...,   0.  ,   0.  ,   0.  ]],
      dtype=float32)
Coordinates:
  * x         (x) float64 3.25e+07 3.25e+07 3.25e+07 ... 3.236e+07 3.236e+07
  * y         (y) float64 5.7e+06 5.7e+06 5.7e+06 ... 5.8e+06 5.8e+06 5.8e+06
    quantile  float64 0.98
/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/pyvista/core/dataset.py:1192: PyvistaDeprecationWarning: Use of `point_arrays` is deprecated. Use `point_data` instead.
  warnings.warn(
ss.visualization.pv_plot([topo_mesh], image_2d=False, ve=5)
getting started

Out:

<pyvista.plotting.plotting.Plotter object at 0x7f62fad9cc10>

Read Profiles

profiles_traces = subsurface.reader.profiles.profiles_core.lineset_from_trace(
    data_path + '/Profiles_cropped/Profile_PyVista.shp',
    idx=range(13)
)

Out:

/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/pyvista/core/dataset.py:1332: PyvistaDeprecationWarning: Use of `cell_arrays` is deprecated. Use `cell_data` instead.
  warnings.warn(
profiles_trisurf_list, profiles_mesh_list = subsurface.reader.profiles.profiles_core.create_tri_surf_from_traces_texture(
    data_path + '/Profiles_cropped/Profile_PyVista.shp',
    path_to_texture=[
        data_path + '/Profiles_cropped/profile001.png',
        data_path + '/Profiles_cropped/profile002.png',
        data_path + '/Profiles_cropped/profile003.png',
        data_path + '/Profiles_cropped/profile004.png',
        data_path + '/Profiles_cropped/profile005.png',
        data_path + '/Profiles_cropped/profile006.png',
        data_path + '/Profiles_cropped/profile007.png',
        data_path + '/Profiles_cropped/profile008.png',
        data_path + '/Profiles_cropped/profile009.png',
        data_path + '/Profiles_cropped/profile010.png',
        data_path + '/Profiles_cropped/profile011.png',
        data_path + '/Profiles_cropped/profile012.png',
        data_path + '/Profiles_cropped/profile013.png',
    ],
    idx=range(13),
    return_mesh=True,
    return_uv=True
)

Out:

/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/pyvista/core/dataset.py:1332: PyvistaDeprecationWarning: Use of `cell_arrays` is deprecated. Use `cell_data` instead.
  warnings.warn(
/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/pyvista/core/dataset.py:1192: PyvistaDeprecationWarning: Use of `point_arrays` is deprecated. Use `point_data` instead.
  warnings.warn(
/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/pyvista/core/dataset.py:1332: PyvistaDeprecationWarning: Use of `cell_arrays` is deprecated. Use `cell_data` instead.
  warnings.warn(
/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/pyvista/core/dataset.py:1192: PyvistaDeprecationWarning: Use of `point_arrays` is deprecated. Use `point_data` instead.
  warnings.warn(
ss.visualization.pv_plot(profiles_mesh_list, image_2d=False, ve=5)
getting started

Out:

<pyvista.plotting.plotting.Plotter object at 0x7f62db6847f0>

GemPy Mesh

UnstructuredData

gempy_unstructured_data = subsurface.reader.read_netcdf.read_unstruct(data_path + '/meshes.nc')

# Element
trisurf_gempy = ss.TriSurf(gempy_unstructured_data)

# Pyvista mesh
gempy_mesh = ss.visualization.to_pyvista_mesh(trisurf_gempy)

Out:

/home/runner/work/subsurface/subsurface/subsurface/reader/read_netcdf.py:18: UserWarning: Trying loading legacy files.
  warnings.warn("Trying loading legacy files.")
/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/pyvista/core/dataset.py:1332: PyvistaDeprecationWarning: Use of `cell_arrays` is deprecated. Use `cell_data` instead.
  warnings.warn(
/opt/hostedtoolcache/Python/3.8.12/x64/lib/python3.8/site-packages/pyvista/core/dataset.py:1192: PyvistaDeprecationWarning: Use of `point_arrays` is deprecated. Use `point_data` instead.
  warnings.warn(
ss.visualization.pv_plot(
    [gempy_mesh],
    image_2d=False,
    ve=5
)
getting started

Out:

<pyvista.plotting.plotting.Plotter object at 0x7f62db3d4f40>

sphinx_gallery_thumbnail_number = 6

Export to binary

subsurface.writer.to_binary.base_structs_to_binary_file(data_path + '/wells',
                                                        wells_unstructured_data)
subsurface.writer.to_binary.base_structs_to_binary_file(data_path + '/topo',
                                                        topo_structured_data)
subsurface.writer.to_binary.base_structs_to_binary_file(data_path + '/collars',
                                                        borehole_location_struct)

for e, tri_surf in enumerate(profiles_trisurf_list):
    subsurface.writer.to_binary.base_structs_to_binary_file(data_path + f'/profile_{e}_mesh',
                                                            tri_surf.mesh)
    subsurface.writer.to_binary.base_structs_to_binary_file(
        data_path + f'/profile_{e}_texture_C',
        tri_surf.texture,
        order='C')

Total running time of the script: ( 0 minutes 44.099 seconds)

Gallery generated by Sphinx-Gallery