Remove map_regions() and default mappings csv (#773)

* Remove `map_regions()` and default mappings csv * Add reference to the nomenclature package * Add to release notes
IAMconsortium · Aug 28, 2023 · b16ae63 · b16ae63
1 parent 53c806c
commit b16ae63
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 468 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -29,9 +29,12 @@ approach to exposing pyam-internals at the package level, requiring a more expli
 import of these methods. For example, use `pyam.utils.to_list()`
 instead of `pyam.to_list()`.
 
-## Individual updates
+PR [#773](https://github.com/IAMconsortium/pyam/pull/773) removed the `map_regions()`
+method. This feature is now implemented via the **nomenclature.RegionProcessor** class.
 
+## Individual updates
 
+- [#773](https://github.com/IAMconsortium/pyam/pull/773) Remove `map_regions()` and default mappings csv
 - [#772](https://github.com/IAMconsortium/pyam/pull/772) Show all missing rows for `require_data()`
 - [#771](https://github.com/IAMconsortium/pyam/pull/771) Refactor to start a separate validation module
 - [#766](https://github.com/IAMconsortium/pyam/pull/766) Use **ixmp4** for credentials to access a Scenario Explorer database

diff --git a/docs/conf.py b/docs/conf.py
@@ -328,6 +328,7 @@
     "plotly": ("https://plotly.com/python-api-reference/", None),
     "pandas_datareader": ("https://pandas-datareader.readthedocs.io/en/stable", None),
     "unfccc_di_api": ("https://unfccc-di-api.readthedocs.io/en/stable", None),
+    "nomenclature": ("https://nomenclature-iamc.readthedocs.io/en/stable", None),
 }
 
 # Set up the plotting gallery with plotly scraper

diff --git a/pyam/core.py b/pyam/core.py
@@ -1543,6 +1543,14 @@ def aggregate_region(
         --------
         add : Add timeseries data items `a` and `b` along an `axis`
         aggregate : Aggregate timeseries data along the `variable` hierarchy.
+        nomenclature.RegionProcessor : Processing of model-specific region-mappings.
+
+        Notes
+        -----
+        The :class:`nomenclature-iamc` package supports structured processing
+        of many-to-many-region mappings. Read the `user guide`_ for more information.
+
+        .. _`user guide` : https://nomenclature-iamc.readthedocs.io/en/stable/user_guide.html
 
         """
         _df = _aggregate_region(
@@ -2564,98 +2572,10 @@ def map_regions(
         remove_duplicates=False,
         inplace=False,
     ):
-        """Plot regional data for a single model, scenario, variable, and year
-
-        see pyam.plotting.region_plot() for all available options
-
-        Parameters
-        ----------
-        map_col : str
-            The column used to map new regions to. Common examples include
-            iso and 5_region.
-        agg : str, optional
-            Perform a data aggregation. Options include: sum.
-        copy_col : str, optional
-            Copy the existing region data into a new column for later use.
-        fname : str, optional
-            Use a non-default region mapping file
-        region_col : string, optional
-            Use a non-default column name for regions to map from.
-        remove_duplicates : bool, optional
-            If there are duplicates in the mapping from one regional level to
-            another, then remove these duplicates by counting the most common
-            mapped value.
-            This option is most useful when mapping from high resolution
-            (e.g., model regions) to low resolution (e.g., 5_region).
-        inplace : bool, optional
-            if True, do operation inplace and return None
-        """
-        fname = fname or run_control()["region_mapping"]["default"]
-        mapping = read_pandas(Path(fname)).rename(str.lower, axis="columns")
-        map_col = map_col.lower()
-
-        ret = self.copy() if not inplace else self
-        _df = ret.data
-        columns_orderd = _df.columns
-
-        # merge data
-        dfs = []
-        for model in self.model:
-            df = _df[_df["model"] == model]
-            _col = region_col or "{}.REGION".format(model)
-            _map = mapping.rename(columns={_col.lower(): "region"})
-            _map = _map[["region", map_col]].dropna().drop_duplicates()
-            _map = _map[_map["region"].isin(_df["region"])]
-            if remove_duplicates and _map["region"].duplicated().any():
-                # find duplicates
-                where_dup = _map["region"].duplicated(keep=False)
-                dups = _map[where_dup]
-                logger.warning(
-                    """
-                Duplicate entries found for the following regions.
-                Mapping will occur only for the most common instance.
-                {}""".format(
-                        dups["region"].unique()
-                    )
-                )
-                # get non duplicates
-                _map = _map[~where_dup]
-                # order duplicates by the count frequency
-                dups = (
-                    dups.groupby(["region", map_col])
-                    .size()
-                    .reset_index(name="count")
-                    .sort_values(by="count", ascending=False)
-                    .drop("count", axis=1)
-                )
-                # take top occurance
-                dups = dups[~dups["region"].duplicated(keep="first")]
-                # combine them back
-                _map = pd.concat([_map, dups])
-            if copy_col is not None:
-                df[copy_col] = df["region"]
-
-            df = (
-                df.merge(_map, on="region")
-                .drop("region", axis=1)
-                .rename(columns={map_col: "region"})
-            )
-            dfs.append(df)
-        df = pd.concat(dfs)
-
-        # perform aggregations
-        if agg == "sum":
-            df = df.groupby(self.dimensions).sum().reset_index()
-
-        df = (
-            df.reindex(columns=columns_orderd)
-            .sort_values(SORT_IDX)
-            .reset_index(drop=True)
+        # TODO: deprecated, remove for release >= 2.1
+        raise DeprecationWarning(
+            "This method was removed. Please use `aggregate_region()` instead."
         )
-        ret._data = df.set_index(self.dimensions).value
-
-        if not inplace:
-            return ret
 
 
 def _meta_idx(data):