Skip to content

Commit

Permalink
Unroll adding values in constructor to remove performance overhead
Browse files Browse the repository at this point in the history
Implement PR feedback by @pudo
  • Loading branch information
tillprochaska committed Jul 10, 2023
1 parent f61f739 commit 00bb791
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
8 changes: 7 additions & 1 deletion followthemoney/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,13 @@ def __init__(
if key not in self.schema.properties:
continue
if cleaned:
self.add(key, values, cleaned=cleaned)
# This does not call `self.add` as it might be called millions of times
# in some context and we want to avoid the performance overhead of doing so.
seen = set()
seen_add = seen.add
unique_values = [v for v in values if not (v in seen or seen_add(v))]
self._properties[key] = unique_values
self._size += sum([len(v) for v in unique_values])
else:
self.add(key, values, quiet=True)

Expand Down
12 changes: 12 additions & 0 deletions tests/test_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,15 @@ def test_value_deduplication(self):
assert proxy.get("name") == ["ACME, Inc."]
proxy.add("name", "ACME, Inc.")
assert proxy.get("name") == ["ACME, Inc."]

def test_value_deduplication_cleaned(self):
proxy = EntityProxy.from_dict(model, {
"id": "acme-inc",
"schema": "Company",
"properties": {
"name": ["ACME, Inc.", "ACME, Inc."],
},
}, cleaned=True)

assert proxy.get("name") == ["ACME, Inc."]

0 comments on commit 00bb791

Please sign in to comment.