Skip to content

Commit

Permalink
Updated using importlib_resource instead of pkg_resources and pre…
Browse files Browse the repository at this point in the history
…pare for later versions of `pandas` (#492)

Fixes #491  

- [x] Updated using `importlib_resource` instead of `pkg_resources`
   - Reason: `pkg_resources` is going to be deprecated.

- [x] Refactor `pandas` related code to smoothly transition to future
versions and handle deprecation warnings.
- [Pandas
PR](https://github.com/pandas-dev/pandas/pull/54710/files#diff-55001624a0932c1b6cee2e6ddb65dea85c1faf0dee84812c0ca0c32916a71438):
  ```
"Downcasting behavior in `replace` is deprecated and "
"will be removed in a future version. To retain the old "
                             "behavior, explicitly call "
                             "`result.infer_objects(copy=False)`. "
                             "To opt-in to the future "
                             "behavior, set "
"`pd.set_option('future.no_silent_downcasting', True)`",
  ```
  - `A value is trying to be set on a copy of a slice from a DataFrame`
  - `.apply(max)` => `.apply(np.maximum.reduce)`
- `UserWarning: Boolean Series key will be reindexed to match DataFrame
index.`
  • Loading branch information
hrshdhgd authored Feb 8, 2024
1 parent d44267b commit 96b3ab9
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 12 deletions.
27 changes: 24 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ deprecation = "^2.1.0"
pyyaml = "^6.0.1"
rdflib = ">=6.0.0"
scipy = {version = "*", extras = ["scipy"]}
importlib-resources = "^6.1.1"

[tool.poetry.group.dev.dependencies]
pytest = {version = ">=7.1.2"}
Expand Down
4 changes: 2 additions & 2 deletions src/sssom/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
from functools import cached_property, lru_cache
from typing import Any, Dict, List, Literal, Set

import pkg_resources
import importlib_resources
import yaml
from linkml_runtime.utils.schema_as_dict import schema_as_dict
from linkml_runtime.utils.schemaview import SchemaView

HERE = pathlib.Path(__file__).parent.resolve()

SCHEMA_YAML = pkg_resources.resource_filename("sssom_schema", "schema/sssom_schema.yaml")
SCHEMA_YAML = importlib_resources.files("sssom_schema").joinpath("schema/sssom_schema.yaml")
EXTENDED_PREFIX_MAP = HERE / "obo.epm.json"

OWL_EQUIV_CLASS_URI = "http://www.w3.org/2002/07/owl#equivalentClass"
Expand Down
6 changes: 3 additions & 3 deletions src/sssom/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Mapping, Union

import curies
import pkg_resources
import importlib_resources
from curies import Converter
from rdflib.namespace import is_ncname

Expand All @@ -19,8 +19,8 @@
]

SSSOM_BUILT_IN_PREFIXES = ("sssom", "owl", "rdf", "rdfs", "skos", "semapv")
SSSOM_CONTEXT = pkg_resources.resource_filename(
"sssom_schema", "context/sssom_schema.context.jsonld"
SSSOM_CONTEXT = importlib_resources.files("sssom_schema").joinpath(
"context/sssom_schema.context.jsonld"
)


Expand Down
37 changes: 33 additions & 4 deletions src/sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@
KEY_FEATURES = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID, PREDICATE_MODIFIER]
TRIPLES_IDS = [SUBJECT_ID, PREDICATE_ID, OBJECT_ID]

# ! This will be unnecessary when pandas >= 3.0.0 is released
# ! https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.infer_objects.html#
# A value is trying to be set on a copy of a slice from a DataFrame
pd.options.mode.copy_on_write = True
# Get the version of pandas as a tuple of integers
pandas_version = tuple(map(int, pd.__version__.split(".")))


@dataclass
class MappingSetDataFrame:
Expand Down Expand Up @@ -151,6 +158,12 @@ def from_mapping_set_document(cls, doc: MappingSetDocument) -> "MappingSetDataFr
df = pd.DataFrame(get_dict_from_mapping(mapping) for mapping in doc.mapping_set.mappings)
meta = _extract_global_metadata(doc)

if pandas_version >= (2, 0, 0):
# For pandas >= 2.0.0, use the 'copy' parameter
df = df.infer_objects(copy=False)
else:
# For pandas < 2.0.0, call 'infer_objects()' without any parameters
df = df.infer_objects()
# remove columns where all values are blank.
df.replace("", np.nan, inplace=True)
df.dropna(axis=1, how="all", inplace=True) # remove columns with all row = 'None'-s.
Expand All @@ -160,6 +173,14 @@ def from_mapping_set_document(cls, doc: MappingSetDocument) -> "MappingSetDataFr
slot for slot, slot_metadata in slots.items() if slot_metadata["range"] == "double"
}
non_double_cols = df.loc[:, ~df.columns.isin(slots_with_double_as_range)]

if pandas_version >= (2, 0, 0):
# For pandas >= 2.0.0, use the 'copy' parameter
non_double_cols = non_double_cols.infer_objects(copy=False)
else:
# For pandas < 2.0.0, call 'infer_objects()' without any parameters
non_double_cols = non_double_cols.infer_objects()

non_double_cols.replace(np.nan, "", inplace=True)
df.update(non_double_cols)

Expand Down Expand Up @@ -1397,18 +1418,26 @@ def invert_mappings(
non_predicate_modified_df = df

if subject_prefix:
subject_starts_with_prefix_condition = df[SUBJECT_ID].str.startswith(subject_prefix + ":")
object_starts_with_prefix_condition = df[OBJECT_ID].str.startswith(subject_prefix + ":")
# Filter rows where 'SUBJECT_ID' starts with the prefix but 'OBJECT_ID' does not
prefixed_subjects_df = pd.DataFrame(
non_predicate_modified_df[
(subject_starts_with_prefix_condition & ~object_starts_with_prefix_condition)
(
non_predicate_modified_df[SUBJECT_ID].str.startswith(subject_prefix + ":")
& ~non_predicate_modified_df[OBJECT_ID].str.startswith(subject_prefix + ":")
)
]
)

# Filter rows where 'SUBJECT_ID' does not start with the prefix but 'OBJECT_ID' does
non_prefix_subjects_df = pd.DataFrame(
non_predicate_modified_df[
(~subject_starts_with_prefix_condition & object_starts_with_prefix_condition)
(
~non_predicate_modified_df[SUBJECT_ID].str.startswith(subject_prefix + ":")
& non_predicate_modified_df[OBJECT_ID].str.startswith(subject_prefix + ":")
)
]
)

df_to_invert = non_prefix_subjects_df.loc[
non_prefix_subjects_df[PREDICATE_ID].isin(list(predicate_invert_map.keys()))
]
Expand Down

0 comments on commit 96b3ab9

Please sign in to comment.