Merge pull request #84 from OpenRarity/vicky/trait_count

Adding Meta Trait: Trait Count
OpenRarity · Oct 18, 2022 · 7556e35 · 7556e35
2 parents fd351f8 + ea2af13
commit 7556e35
Show file tree

Hide file tree

Showing 22 changed files with 1,414 additions and 662 deletions.
diff --git a/open_rarity/models/collection.py b/open_rarity/models/collection.py
@@ -1,3 +1,4 @@
+import warnings
 from collections import defaultdict
 from dataclasses import dataclass
 from functools import cached_property
@@ -11,6 +12,8 @@
 from open_rarity.models.token_standard import TokenStandard
 from open_rarity.models.utils.attribute_utils import normalize_attribute_string
 
+TRAIT_COUNT_ATTRIBUTE_NAME = "meta_trait:trait_count"
+
 
 @dataclass
 class CollectionAttribute:
@@ -69,20 +72,27 @@ class Collection:
     def __init__(
         self,
         tokens: list[Token],
+        # Deprecated - Kept to not break interface, but is not used.
+        # We always coimpute the attributes_frequency_counts from the tokens to avoid
+        # divergence.
+        # TODO [10/16/22]: To remove in 1.0 release
         attributes_frequency_counts: dict[AttributeName, dict[AttributeValue, int]]
         | None = None,
         name: str | None = "",
     ):
+        if attributes_frequency_counts is not None:
+            warnings.warn(
+                "`attribute_frequency_counts` is deprecated and will be removed. "
+                "Counts will be derived from the token data.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+        self._trait_countify(tokens)
         self._tokens = tokens
         self.name = name or ""
-        if attributes_frequency_counts:
-            self.attributes_frequency_counts = (
-                self._normalize_attributes_frequency_counts(attributes_frequency_counts)
-            )
-        else:
-            self.attributes_frequency_counts = (
-                self._derive_normalized_attributes_frequency_counts()
-            )
+        self.attributes_frequency_counts = (
+            self._derive_normalized_attributes_frequency_counts()
+        )
 
     @property
     def tokens(self) -> list[Token]:
@@ -201,6 +211,31 @@ def extract_collection_attributes(
 
         return collection_traits
 
+    def _trait_countify(self, tokens: list[Token]) -> None:
+        """Updates tokens to have meta attribute "meta trait: trait_count" if it doesn't
+        already exist.
+
+        Parameters
+        ----------
+        tokens : list[Token]
+            List of tokens to add trait count attribute to. Modifies in place.
+
+        """
+        for token in tokens:
+            trait_count = token.trait_count()
+            if token.has_attribute(TRAIT_COUNT_ATTRIBUTE_NAME):
+                trait_count -= 1
+            # NOTE: There is a chance we override an existing attribute here, but it's
+            # highly unlikely that a token would have a trait_count attribute to begin
+            # with (no known collections have it right now).
+            # To decrease the chance of collision, we pre-pend "meta trait: ".
+            # If an existing trait count attribute already exists with a different name,
+            # we will not remove it. In the future, we can refactor to distinguish
+            # between meta and non-meta attributes.
+            token.metadata.add_attribute(
+                StringAttribute(name=TRAIT_COUNT_ATTRIBUTE_NAME, value=str(trait_count))
+            )
+
     def _normalize_attributes_frequency_counts(
         self,
         attributes_frequency_counts: dict[AttributeName, dict[AttributeValue, int]],

diff --git a/open_rarity/models/token.py b/open_rarity/models/token.py
@@ -7,8 +7,14 @@
     TokenIdentifier,
     get_identifier_class_from_dict,
 )
-from open_rarity.models.token_metadata import AttributeName, TokenMetadata
+from open_rarity.models.token_metadata import (
+    Attribute,
+    AttributeName,
+    StringAttribute,
+    TokenMetadata,
+)
 from open_rarity.models.token_standard import TokenStandard
+from open_rarity.models.utils.attribute_utils import normalize_attribute_string
 
 
 @dataclass
@@ -113,6 +119,29 @@ def from_dict(cls, data_dict: dict):
     def attributes(self) -> dict[AttributeName, Any]:
         return self.metadata.to_attributes()
 
+    def has_attribute(self, attribute_name: str) -> bool:
+        return self.metadata.attribute_exists(attribute_name)
+
+    def trait_count(self) -> int:
+        """Returns the count of non-null, non-"none" value traits this token has."""
+
+        def get_attributes_count(attributes: list[Attribute]) -> int:
+            return sum(
+                map(
+                    lambda a: (
+                        not isinstance(a, StringAttribute)
+                        or normalize_attribute_string(a.value) not in ("none", "")
+                    ),
+                    attributes,
+                )
+            )
+
+        return (
+            get_attributes_count(self.metadata.string_attributes.values())
+            + get_attributes_count(self.metadata.numeric_attributes.values())
+            + get_attributes_count(self.metadata.date_attributes.values())
+        )
+
     def to_dict(self) -> dict:
         return {
             "token_identifier": self.token_identifier.to_dict(),

diff --git a/open_rarity/models/token_metadata.py b/open_rarity/models/token_metadata.py
@@ -110,19 +110,6 @@ def __post_init__(self):
         )
         self.date_attributes = self._normalize_attributes_dict(self.date_attributes)
 
-    def _normalize_attributes_dict(self, attributes_dict: dict) -> dict:
-        """Helper function that takes in an attributes dictionary
-        and normalizes attribute name in the dictionary to ensure all
-        letters are lower cases and whitespace is stripped.
-        """
-        normalized_attributes_dict = {}
-        for attribute_name, attr in attributes_dict.items():
-            normalized_attr_name = normalize_attribute_string(attribute_name)
-            normalized_attributes_dict[normalized_attr_name] = attr
-            if normalized_attr_name != attr.name:
-                attr.name = normalized_attr_name
-        return normalized_attributes_dict
-
     @classmethod
     def from_attributes(cls, attributes: dict[AttributeName, Any]):
         """Constructs TokenMetadata class based on an attributes dictionary
@@ -184,3 +171,40 @@ def to_attributes(self) -> dict[AttributeName, Any]:
         for attr in self.date_attributes.values():
             attributes[attr.name] = datetime.fromtimestamp(attr.value)
         return attributes
+
+    def add_attribute(self, attribute: Attribute):
+        """Adds an attribute to this metadata object, overriding existing
+        attribute if the normalized attribute name already exists."""
+        if isinstance(attribute, StringAttribute):
+            self.string_attributes[attribute.name] = attribute
+        elif isinstance(attribute, NumericAttribute):
+            self.numeric_attributes[attribute.name] = attribute
+        elif isinstance(attribute, DateAttribute):
+            self.date_attributes[attribute.name] = attribute
+        else:
+            raise TypeError(
+                f"Provided attribute has invalid type: {type(attribute)}. "
+                "Must be either StringAttribute, NumericAttribute or DateAttribute."
+            )
+
+    def attribute_exists(self, attribute_name: str) -> bool:
+        """Returns True if this metadata object has an attribute with the given name."""
+        attr_name = normalize_attribute_string(attribute_name)
+        return (
+            attr_name in self.string_attributes
+            or attr_name in self.numeric_attributes
+            or attr_name in self.date_attributes
+        )
+
+    def _normalize_attributes_dict(self, attributes_dict: dict) -> dict:
+        """Helper function that takes in an attributes dictionary
+        and normalizes attribute name in the dictionary to ensure all
+        letters are lower cases and whitespace is stripped.
+        """
+        normalized_attributes_dict = {}
+        for attribute_name, attr in attributes_dict.items():
+            normalized_attr_name = normalize_attribute_string(attribute_name)
+            normalized_attributes_dict[normalized_attr_name] = attr
+            if normalized_attr_name != attr.name:
+                attr.name = normalized_attr_name
+        return normalized_attributes_dict
diff --git a/open_rarity/rarity_ranker.py b/open_rarity/rarity_ranker.py
@@ -109,15 +109,15 @@ def set_rarity_ranks(
             reverse=True,
         )
 
-        # perform ranking of each token in collection
-        for i, token in enumerate(sorted_token_rarities):
+        # Perform ranking of each token in collection
+        for i, token_rarity in enumerate(sorted_token_rarities):
             rank = i + 1
             if i > 0:
-                prev_token = sorted_token_rarities[i - 1]
-                scores_equal = math.isclose(token.score, prev_token.score)
+                prev_token_rarity = sorted_token_rarities[i - 1]
+                scores_equal = math.isclose(token_rarity.score, prev_token_rarity.score)
                 if scores_equal:
-                    rank = prev_token.rank
+                    rank = prev_token_rarity.rank
 
-            token.rank = rank
+            token_rarity.rank = rank
 
         return sorted_token_rarities
diff --git a/open_rarity/resolver/rarity_providers/rarity_sniffer.py b/open_rarity/resolver/rarity_providers/rarity_sniffer.py
@@ -6,9 +6,6 @@
 
 logger = logging.getLogger("open_rarity_logger")
 RARITY_SNIFFER_API_URL = "https://raritysniffer.com/api/index.php"
-RARITY_SNIPER_API_URL = (
-    "https://api.raritysniper.com/public/collection/{slug}/id/{token_id}"
-)
 USER_AGENT = {
     "User-Agent": (
         "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "

diff --git a/open_rarity/resolver/testset_resolver.py b/open_rarity/resolver/testset_resolver.py
@@ -215,7 +215,8 @@ def resolve_collection_data(
     filename: str = "test_collections.json",
     max_tokens_to_calculate: int = None,
     use_cache: bool = True,
-) -> None:
+    output_file_to_disk: bool = True,
+) -> list | None:
     """Resolves collection information through OpenSea API
 
     Parameters
@@ -236,6 +237,16 @@ def resolve_collection_data(
         If set to true, will cache fetched data from external API's in order to ensure
         re-runs for same collections are faster. Only use if collection and token
         metadata is static - do not work for unrevealed/changing collections.
+    output_file_to_disk: bool
+        If set to true, will output the resolved collection data to disk.
+        Set to False if you want to use the data in memory only.
+        Needed for testing.
+
+    Returns
+    -------
+    list
+        A list of the rows that would be written to the output file. Only returned if
+        output_file_to_disk is set to False.
 
     Raises
     ------
@@ -248,6 +259,7 @@ def resolve_collection_data(
 
     data = json.load(io.BytesIO(golden_collections))
     print("------------------------------")
+    slugs_to_rows = {}
     for collection_def in data:
         start_time = time()
         opensea_slug = collection_def["collection_slug"]
@@ -288,15 +300,20 @@ def resolve_collection_data(
                 scores=open_rarity_scores,
             )
 
-        print(f"4. Wrote to CSV: {opensea_slug}")
+        if output_file_to_disk:
+            print(f"4. Wrote to CSV: {opensea_slug}")
 
-        serialize_to_csv(
+        rows = serialize_to_csv(
             collection_with_metadata=collection_with_metadata,
             tokens_with_rarity=tokens_with_rarity,
+            dry_run=not output_file_to_disk,
         )
+        slugs_to_rows[opensea_slug] = rows
         time_elapsed = round(time() - start_time)
         print(f"FINISHED: Resolved collection: {opensea_slug} in {time_elapsed} secs")
 
+    return slugs_to_rows if not output_file_to_disk else None
+
 
 def augment_with_open_rarity_scores(
     tokens_with_rarity: list[TokenWithRarityData], scores: OpenRarityScores
@@ -491,13 +508,17 @@ def _rank_diff(rank1: int | None, rank2: int | None) -> int | None:
 def serialize_to_csv(
     collection_with_metadata: CollectionWithMetadata,
     tokens_with_rarity: list[TokenWithRarityData],
-) -> None:
+    dry_run: bool = False,
+) -> list | None:
     """Serialize collection and ranking data to CSV
 
     Parameters
     ----------
-    collection : Collection
-        collection
+    collection_with_metadata : Collection
+    dry_run: bool
+        If set to True, the CSV will not be written to disk but returned as an
+        array of rows
+
     """
     slug = collection_with_metadata.opensea_slug
     testset = open(f"testset_{slug}.csv", "w")
@@ -534,6 +555,7 @@ def serialize_to_csv(
 
     writer = csv.writer(testset)
     writer.writerow(headers)
+    rows = []
 
     for token_with_rarity in tokens_with_rarity:
         traits_sniper_rank = _get_provider_rank(
@@ -588,7 +610,12 @@ def serialize_to_csv(
             _rank_diff(rarity_sniper_rank, or_sum_rank),
             _rank_diff(rarity_sniper_rank, or_ic_rank),
         ]
-        writer.writerow(row)
+        if dry_run:
+            rows.append(row)
+        else:
+            writer.writerow(row)
+
+    return rows if rows else None
 
 
 if __name__ == "__main__":

diff --git a/open_rarity/scoring/handlers/arithmetic_mean_scoring_handler.py b/open_rarity/scoring/handlers/arithmetic_mean_scoring_handler.py
@@ -1,14 +1,10 @@
-import logging
-
 import numpy as np
 
 from open_rarity.models.collection import Collection, CollectionAttribute
 from open_rarity.models.token import Token
 from open_rarity.models.token_metadata import AttributeName
 from open_rarity.scoring.utils import get_token_attributes_scores_and_weights
 
-logger = logging.getLogger("open_rarity_logger")
-
 
 class ArithmeticMeanScoringHandler:
     """arithmetic mean of a token's n trait probabilities"""
@@ -69,22 +65,12 @@ def _score_token(
         float
             The token score
         """
-        logger.debug("Computing arithmetic mean for token %s", token)
-
         attr_scores, attr_weights = get_token_attributes_scores_and_weights(
             collection=collection,
             token=token,
             normalized=normalized,
             collection_null_attributes=collection_null_attributes,
         )
 
-        logger.debug(
-            "[amean] Calculated for %s %s:%s %s",
-            collection,
-            token,
-            attr_scores,
-            attr_weights,
-        )
-
         avg = float(np.average(attr_scores, weights=attr_weights))
         return avg
diff --git a/open_rarity/scoring/handlers/geometric_mean_scoring_handler.py b/open_rarity/scoring/handlers/geometric_mean_scoring_handler.py
@@ -1,14 +1,10 @@
-import logging
-
 import scipy.stats
 
 from open_rarity.models.collection import Collection, CollectionAttribute
 from open_rarity.models.token import Token
 from open_rarity.models.token_metadata import AttributeName
 from open_rarity.scoring.utils import get_token_attributes_scores_and_weights
 
-logger = logging.getLogger("open_rarity_logger")
-
 
 class GeometricMeanScoringHandler:
     """geometric mean of a token's n trait probabilities
@@ -72,8 +68,6 @@ def _score_token(
         float
             The token score
         """
-        logger.debug(f"Computing geometric mean for {collection} token {token}")
-
         attr_scores, attr_weights = get_token_attributes_scores_and_weights(
             collection=collection,
             token=token,