Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Partial deck parsing #487

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions res2df/compdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

try:
# pylint: disable=unused-import
import opm.io.deck
import opm.io
except ImportError:
# Allow parts of res2df to work without OPM:
pass
Expand Down Expand Up @@ -993,7 +993,9 @@ def df(
Returns:
pd.Dataframe with one row pr cell to well connection
"""
compdat_df = deck2dfs(resdatafiles.get_deck())["COMPDAT"]
compdat_df = deck2dfs(
resdatafiles.get_deck(sections=[opm.io.eclSectionType.SCHEDULE])
)["COMPDAT"]
compdat_df = unrolldf(compdat_df)

if initvectors:
Expand Down
18 changes: 16 additions & 2 deletions res2df/equil.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,14 @@ def df(
pd.DataFrame, at least with columns KEYWORD and EQLNUM
"""
if isinstance(deck, ResdataFiles):
deck = deck.get_deck()
try:
deck = deck.get_deck(
sections=[opm.io.eclSectionType.RUNSPEC, opm.io.eclSectionType.SOLUTION]
)
except (
AttributeError
): # opm<=2023.10 RUNSPEC is included by default and not an option.
deck = deck.get_deck(sections=[opm.io.eclSectionType.SOLUTION])

deck = inject_xxxdims_ntxxx("EQLDIMS", "NTEQUL", deck, ntequl)
ntequl = deck["EQLDIMS"][0][DIMS_POS["NTEQUL"]].get_int(0)
Expand Down Expand Up @@ -328,7 +335,14 @@ def equil_main(args) -> None:
)
resdatafiles = ResdataFiles(args.DATAFILE)
if resdatafiles:
deck = resdatafiles.get_deck()
try:
deck = resdatafiles.get_deck(
sections=[opm.io.eclSectionType.RUNSPEC, opm.io.eclSectionType.SOLUTION]
)
except (
AttributeError
): # opm<=2023.10 RUNSPEC is included by default and not an option.
deck = resdatafiles.get_deck(sections=[opm.io.eclSectionType.SOLUTION])
if "EQLDIMS" in deck:
# Things are easier when a full deck with (correct) EQLDIMS
# is supplied:
Expand Down
4 changes: 2 additions & 2 deletions res2df/faults.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def df(deck: Union[ResdataFiles, "opm.libopmcommon_python.Deck"]) -> pd.DataFram
deck: A :term:`deck`
"""
if isinstance(deck, ResdataFiles):
deck = deck.get_deck()
deck = deck.get_deck(sections=[opm.io.eclSectionType.GRID])

# In[91]: list(deck['FAULTS'][0])
# Out[91]: [[u'F1'], [36], [36], [41], [42], [1], [14], [u'I']]
Expand Down Expand Up @@ -91,7 +91,7 @@ def faults_main(args) -> None:
)
resdatafiles = ResdataFiles(args.DATAFILE)
if resdatafiles:
deck = resdatafiles.get_deck()
deck = resdatafiles.get_deck(sections=[opm.io.eclSectionType.GRID])
faults_df = df(deck)
write_dframe_stdout_file(
faults_df,
Expand Down
7 changes: 5 additions & 2 deletions res2df/gruptree.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def df(
date = None

if isinstance(deck, ResdataFiles):
deck = deck.get_deck()
deck = deck.get_deck(sections=[opm.io.eclSectionType.SCHEDULE])

edgerecords = [] # list of dict of rows containing an edge.
nodedatarecords = []
Expand Down Expand Up @@ -457,7 +457,10 @@ def gruptree_main(args) -> None:
print("Nothing to do. Set --output or --prettyprint")
sys.exit(0)
resdatafiles = ResdataFiles(args.DATAFILE)
dframe = df(resdatafiles.get_deck(), startdate=args.startdate)
dframe = df(
resdatafiles.get_deck(sections=[opm.io.eclSectionType.SCHEDULE]),
startdate=args.startdate,
)
if args.prettyprint:
if "DATE" in dframe:
print(prettyprint(dframe))
Expand Down
18 changes: 16 additions & 2 deletions res2df/pvt.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,14 @@ def df(
pd.DataFrame
"""
if isinstance(deck, ResdataFiles):
deck = deck.get_deck()
try:
deck = deck.get_deck(
sections=[opm.io.eclSectionType.RUNSPEC, opm.io.eclSectionType.PROPS]
)
except (
AttributeError
): # opm<=2023.10 RUNSPEC is included by default and not an option.
deck = deck.get_deck(sections=[opm.io.eclSectionType.PROPS])

deck = inject_xxxdims_ntxxx("TABDIMS", "NTPVT", deck, ntpvt)
ntpvt = deck["TABDIMS"][0][DIMS_POS["NTPVT"]].get_int(0)
Expand Down Expand Up @@ -299,7 +306,14 @@ def pvt_main(args) -> None:
resdatafiles = ResdataFiles(args.DATAFILE)
logger.info("Parsed %s", args.DATAFILE)
if resdatafiles:
deck = resdatafiles.get_deck()
try:
deck = resdatafiles.get_deck(
sections=[opm.io.eclSectionType.RUNSPEC, opm.io.eclSectionType.PROPS]
)
except (
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You may want to move this except to inside get_deck()?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will take a look at it, probably a good idea.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like an issue in this case will be that we cannot run it with opm < 2024.04 (which doesn't exist yet), as opm.io.eclSectionType.RUNSPEC isn't valid (without complicating the code outside get_deck).
An option is to instead have a list of strings as the get_deck() input and then map them to opm.io.eclSectionType inside get_deck(). Might also make it easier for most users.
E.g. a syntax like this get_deck(sections=["RUNSPEC", "PROPS"])

AttributeError
): # opm<=2023.10 RUNSPEC is included by default and not an option.
deck = resdatafiles.get_deck(sections=[opm.io.eclSectionType.PROPS])
if "TABDIMS" in deck:
# Things are easier when a full deck with correct TABDIMS
# is supplied:
Expand Down
14 changes: 11 additions & 3 deletions res2df/resdatafiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def get_path(self) -> Path:
"""Return the full path to the directory with the .DATA file"""
return Path(self._eclbase).absolute().parent

def get_deck(self) -> "opm.libopmcommon_python.Deck":
def get_deck(self, sections: list = []) -> "opm.libopmcommon_python.Deck":
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://docs.python-guide.org/writing/gotchas/

use sections: list = None and then set it to the empty list later if None.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

considered to use None, but used an empty list as that is what is the default in opm.io.Parser.parse and tried to keep it more or less consistent. Open to change it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a bug, you need to change it :)

There will be situations where you think you have an empty list, but then it isn't.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if I understand you correctly.
The case here is:
If you give the opm parser an empty section list (which is the default), it returns the full deck.
If your opinion is that if you give an empty section list the expected returned value would be empty and that it might be confusing, I don't disagree with that. Also why I considered None in the first place. But I wouldn't call that a bug considering it was intended to keep the same format and behavior as opm.

So basically my question is: is what described above the "bug" or is it something else? I have no big issue with switching the default to None, but would like to know exactly what you mean as "the bug" to fix is 😉

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It bit me hard once: equinor/fmu-ensemble@f109b6e

"""Return a opm.io :term:`deck` of the .DATA file"""
if not self._deck:
if Path(self._eclbase + ".DATA").is_file():
Expand All @@ -92,8 +92,16 @@ def get_deck(self) -> "opm.libopmcommon_python.Deck":
deckfile = self._eclbase # Will be any filename
logger.info("Parsing deck file %s...", deckfile)
parsecontext = opm.io.ParseContext(OPMIOPARSER_RECOVERY)
deck = opm.io.Parser().parse(deckfile, parsecontext)
self._deck = deck
if len(sections) > 0:
try:
deck = opm.io.Parser().parse(deckfile, parsecontext, sections)
# Return without caching as only a part of the deck is parsed
return deck
except RuntimeError:
# Occurs if not able to parse individual sections
# pass to parse and cache full deck
pass
self._deck = opm.io.Parser().parse(deckfile, parsecontext)
return self._deck

@staticmethod
Expand Down
20 changes: 17 additions & 3 deletions res2df/satfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,14 @@ def df(
if isinstance(deck, ResdataFiles):
# NB: If this is done on include files and not on .DATA files
# we can loose data for SATNUM > 1
deck = deck.get_deck()
try:
deck = deck.get_deck(
sections=[opm.io.eclSectionType.RUNSPEC, opm.io.eclSectionType.PROPS]
)
except (
AttributeError
): # opm<=2023.10 RUNSPEC is included by default and not an option.
deck = deck.get_deck(sections=[opm.io.eclSectionType.PROPS])
deck = inject_xxxdims_ntxxx("TABDIMS", "NTSFUN", deck, ntsfun)
assert "TABDIMS" in deck

Expand Down Expand Up @@ -206,11 +213,18 @@ def satfunc_main(args) -> None:
)
resdatafiles = ResdataFiles(args.DATAFILE)
if resdatafiles:
deck = resdatafiles.get_deck()
try:
deck = resdatafiles.get_deck(
sections=[opm.io.eclSectionType.RUNSPEC, opm.io.eclSectionType.PROPS]
)
except (
AttributeError
): # opm<=2023.10 RUNSPEC is included by default and not an option.
deck = resdatafiles.get_deck(sections=[opm.io.eclSectionType.PROPS])
if "TABDIMS" in deck:
# Things are easier when a full deck with (correct) TABDIMS
# is supplied:
satfunc_df = df(resdatafiles, keywords=args.keywords)
satfunc_df = df(deck, keywords=args.keywords)
else:
# This might be an include file for which we have to infer/guess
# TABDIMS. Then we send it to df() as a string
Expand Down
14 changes: 6 additions & 8 deletions res2df/vfp/_vfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def basic_data(
"""

if isinstance(deck, ResdataFiles):
deck = deck.get_deck()
deck = deck.get_deck(sections=[opm.io.eclSectionType.SCHEDULE])
elif isinstance(deck, str):
deck = ResdataFiles.str2deck(deck)

Expand Down Expand Up @@ -259,7 +259,7 @@ def dfs(
Syntax "[0,1,8:11]" corresponds to [0,1,8,9,10,11].
"""
if isinstance(deck, ResdataFiles):
deck = deck.get_deck()
deck = deck.get_deck(sections=[opm.io.eclSectionType.SCHEDULE])
elif isinstance(deck, str):
deck = ResdataFiles.str2deck(deck)

Expand Down Expand Up @@ -302,7 +302,7 @@ def pyarrow_tables(
Syntax "[0,1,8:11]" corresponds to [0,1,8,9,10,11].
"""
if isinstance(deck, ResdataFiles):
deck = deck.get_deck()
deck = deck.get_deck(sections=[opm.io.eclSectionType.SCHEDULE])
elif isinstance(deck, str):
deck = ResdataFiles.str2deck(deck)

Expand Down Expand Up @@ -433,7 +433,7 @@ def df(
return pd.DataFrame()

if isinstance(deck, ResdataFiles):
deck = deck.get_deck()
deck = deck.get_deck(sections=[opm.io.eclSectionType.SCHEDULE])
elif isinstance(deck, str):
deck = ResdataFiles.str2deck(deck)

Expand Down Expand Up @@ -507,7 +507,7 @@ def vfp_main(args) -> None:
outputfile = args.output
outputfile.replace(".arrow", "")
vfp_arrow_tables = pyarrow_tables(
resdatafiles.get_deck(), keyword=args.keyword, vfpnumbers_str=vfpnumbers
resdatafiles, keyword=args.keyword, vfpnumbers_str=vfpnumbers
)
for vfp_table in vfp_arrow_tables:
table_number = int(
Expand All @@ -519,9 +519,7 @@ def vfp_main(args) -> None:
)
logger.info(f"Parsed file {args.DATAFILE} for vfp.dfs_arrow")
else:
dframe = df(
resdatafiles.get_deck(), keyword=args.keyword, vfpnumbers_str=vfpnumbers
)
dframe = df(resdatafiles, keyword=args.keyword, vfpnumbers_str=vfpnumbers)
if args.output:
write_dframe_stdout_file(
dframe, args.output, index=False, caller_logger=logger
Expand Down
4 changes: 2 additions & 2 deletions res2df/wcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def df(deck: Union[ResdataFiles, "opm.libopmcommon_python.Deck"]) -> pd.DataFram
"""

if isinstance(deck, ResdataFiles):
deck = deck.get_deck()
deck = deck.get_deck(sections=[opm.io.eclSectionType.SCHEDULE])

wconrecords = [] # List of dicts of every line in input file
date = None # DATE columns will always be there, but can contain NaN
Expand Down Expand Up @@ -98,7 +98,7 @@ def wcon_main(args) -> None:
)
resdatafiles = ResdataFiles(args.DATAFILE)
if resdatafiles:
deck = resdatafiles.get_deck()
deck = resdatafiles.get_deck(sections=[opm.io.eclSectionType.SCHEDULE])
wcon_df = df(deck)
write_dframe_stdout_file(
wcon_df,
Expand Down
15 changes: 14 additions & 1 deletion res2df/wellcompletiondata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
import pyarrow
import pyarrow.feather

try:
import opm.io
except ImportError:
pass

from .common import convert_lyrlist_to_zonemap, parse_lyrfile, write_dframe_stdout_file
from .compdat import df as create_compdat_df
from .res2csvlogger import getLogger_res2csv
Expand Down Expand Up @@ -99,7 +104,15 @@ def _get_unit_system(resdatafiles: ResdataFiles) -> UnitSystem:
default unit system in Eclipse.
"""
unit_systems = [unitsystem.value for unitsystem in UnitSystem]
for keyword in resdatafiles.get_deck():
try:
deck = resdatafiles.get_deck(
sections=[opm.io.eclSectionType.RUNSPEC, opm.io.eclSectionType.SCHEDULE]
)
except (
AttributeError
): # opm<=2023.10 RUNSPEC is included by default and not an option.
deck = resdatafiles.get_deck(sections=[opm.io.eclSectionType.SCHEDULE])
for keyword in deck:
if keyword.name in unit_systems:
return UnitSystem(keyword.name)
return UnitSystem.METRIC
Expand Down
9 changes: 9 additions & 0 deletions tests/test_eclfiles.py → tests/test_resdatafiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,15 @@ def test_filedescriptors():
assert len(list(fd_dir.glob("*"))) == pre_fd_count
assert resdatafiles._rftfile is None

deck = resdatafiles.get_deck(sections=[opm.io.eclSectionType.PROPS])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These new lines should probably be in a new test function, it is not related to testing of file descriptors.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will fix that

assert "WELSPECS" not in deck # verify section parsing
deck = resdatafiles.get_deck(sections=[opm.io.eclSectionType.SCHEDULE])
assert "WELSPECS" in deck # verify that last result was not cached
deck = resdatafiles.get_deck() # full deck will be cached
assert "SWOF" in deck
assert "WELSPECS" in deck
deck = resdatafiles.get_deck(sections=[opm.io.eclSectionType.PROPS])
assert "WELSPECS" in deck # verify that the full deck was cached and used
resdatafiles.get_deck()
# This should not leave any file descriptor open
assert len(list(fd_dir.glob("*"))) == pre_fd_count
Loading