Skip to content

Commit

Permalink
Maintain insert order of property values
Browse files Browse the repository at this point in the history
Fix #1139
  • Loading branch information
tillprochaska committed Jun 28, 2023
1 parent 8ff8e26 commit f61f739
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 13 deletions.
24 changes: 11 additions & 13 deletions followthemoney/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,16 @@ def __init__(
#: than ``id``, ``schema`` or ``properties``, they will be kept in here
#: and re-added upon serialization.
self.context = data
self._properties: Dict[str, Set[str]] = {}
self._properties: Dict[str, List[str]] = {}
self._size = 0

for key, value in properties.items():
for key, values in properties.items():
if key not in self.schema.properties:
continue
if not cleaned:
self.add(key, value, cleaned=cleaned, quiet=True)
if cleaned:
self.add(key, values, cleaned=cleaned)
else:
values = set(value)
self._properties[key] = values
self._size += sum([len(v) for v in values])
self.add(key, values, quiet=True)

def make_id(self, *parts: Any) -> Optional[str]:
"""Generate a (hopefully unique) ID for the given entity, composed
Expand Down Expand Up @@ -127,11 +125,10 @@ def get(self, prop: P, quiet: bool = False) -> List[str]:
prop_name = self._prop_name(prop, quiet=quiet)
if prop_name is None:
return []
return list(self._properties.get(prop_name, []))
return self._properties.get(prop_name, [])

def first(self, prop: P, quiet: bool = False) -> Optional[str]:
"""Get only the first value set for the property, in no particular
order.
"""Get only the first value set for the property.
:param prop: can be given as a name or an instance of
:class:`~followthemoney.property.Property`.
Expand Down Expand Up @@ -217,8 +214,9 @@ def unsafe_add(
# log.warning(msg, prop.name)
return None
self._size += value_size
self._properties.setdefault(prop.name, set())
self._properties[prop.name].add(value)
self._properties.setdefault(prop.name, list())
if value not in self._properties[prop.name]:
self._properties[prop.name].append(value)
return None

def set(
Expand Down Expand Up @@ -275,7 +273,7 @@ def remove(self, prop: P, value: str, quiet: bool = True) -> None:
if prop_name is not None and prop_name in self._properties:
try:
self._properties[prop_name].remove(value)
except KeyError:
except (KeyError, ValueError):
pass

def iterprops(self) -> List[Property]:
Expand Down
42 changes: 42 additions & 0 deletions tests/test_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,45 @@ def test_pickle(self):
assert proxy.id == proxy2.id
assert hash(proxy) == hash(proxy2)
assert proxy2.schema.name == ENTITY["schema"]

def test_value_order(self):
one = EntityProxy.from_dict(model, {
"id": "one",
"schema": "Email",
"properties": {
"bodyHtml": ["Hello", "World"],
},
})

two = EntityProxy.from_dict(model, {
"id": "one",
"schema": "Email",
"properties": {
"bodyHtml": ["World", "Hello"],
},
})

assert one.get("bodyHtml") == ["Hello", "World"]
assert two.get("bodyHtml") == ["World", "Hello"]

def test_value_deduplication(self):
proxy = EntityProxy.from_dict(model, {
"id": "acme-inc",
"schema": "Company",
"properties": {
"name": ["ACME, Inc.", "ACME, Inc."],
},
})

assert proxy.get("name") == ["ACME, Inc."]

proxy = EntityProxy.from_dict(model, {
"id": "acme-inc",
"schema": "Company",
})

assert proxy.get("name") == []
proxy.add("name", "ACME, Inc.")
assert proxy.get("name") == ["ACME, Inc."]
proxy.add("name", "ACME, Inc.")
assert proxy.get("name") == ["ACME, Inc."]

0 comments on commit f61f739

Please sign in to comment.