Skip to content
Snippets Groups Projects
Commit d6d17dad authored by Antoine Lambert's avatar Antoine Lambert
Browse files

collections: Improve ImmutableDict look up by key performance

Previously when looking up data by key in an ImmutableDict, the inner
tuple storing keys and values was iterated until finding the requested
key.

This is not really efficient when the ImmutableDict contains a lot of
entries, typically for an origin snapshot containing a lot of branches.

So use an inner dictionary to speedup look up by key operations and
improve loader performances.
parent 38c58b0c
No related branches found
No related tags found
No related merge requests found
Pipeline #607 passed
# Copyright (C) 2020 The Software Heritage developers # Copyright (C) 2020-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution # See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version # License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information # See top-level LICENSE file for more information
from __future__ import annotations
"""Utility data structures.""" """Utility data structures."""
from collections.abc import Mapping from collections.abc import Mapping
import copy
from typing import Dict, Generic, Iterable, Optional, Tuple, TypeVar, Union from typing import Dict, Generic, Iterable, Optional, Tuple, TypeVar, Union
KT = TypeVar("KT") KT = TypeVar("KT")
...@@ -18,36 +21,35 @@ class ImmutableDict(Mapping, Generic[KT, VT]): ...@@ -18,36 +21,35 @@ class ImmutableDict(Mapping, Generic[KT, VT]):
This class behaves like a dictionary, but internally stores objects in a tuple, This class behaves like a dictionary, but internally stores objects in a tuple,
so it is both immutable and hashable.""" so it is both immutable and hashable."""
data: Tuple[Tuple[KT, VT], ...] _data: Dict[KT, VT]
def __init__( def __init__(
self, self,
data: Union[ data: Union[Iterable[Tuple[KT, VT]], ImmutableDict[KT, VT], Dict[KT, VT]] = {},
Iterable[Tuple[KT, VT]], "ImmutableDict[KT, VT]", Dict[KT, VT]
] = {},
): ):
if isinstance(data, dict): if isinstance(data, dict):
self.data = tuple(item for item in data.items()) self._data = data
elif isinstance(data, ImmutableDict): elif isinstance(data, ImmutableDict):
self.data = data.data self._data = data._data
else: else:
self.data = tuple(data) self._data = {k: v for k, v in data}
@property
def data(self):
return tuple(self._data.items())
def __repr__(self): def __repr__(self):
return f"ImmutableDict({dict(self.data)!r})" return f"ImmutableDict({dict(self.data)!r})"
def __getitem__(self, key): def __getitem__(self, key):
for (k, v) in self.data: return self._data[key]
if k == key:
return v
raise KeyError(key)
def __iter__(self): def __iter__(self):
for (k, v) in self.data: for (k, v) in self.data:
yield k yield k
def __len__(self): def __len__(self):
return len(self.data) return len(self._data)
def items(self): def items(self):
yield from self.data yield from self.data
...@@ -55,15 +57,9 @@ class ImmutableDict(Mapping, Generic[KT, VT]): ...@@ -55,15 +57,9 @@ class ImmutableDict(Mapping, Generic[KT, VT]):
def __hash__(self): def __hash__(self):
return hash(tuple(sorted(self.data))) return hash(tuple(sorted(self.data)))
def copy_pop(self, popped_key) -> Tuple[Optional[VT], "ImmutableDict[KT, VT]"]: def copy_pop(self, popped_key) -> Tuple[Optional[VT], ImmutableDict[KT, VT]]:
"""Returns a copy of this ImmutableDict without the given key, """Returns a copy of this ImmutableDict without the given key,
as well as the value associated to the key.""" as well as the value associated to the key."""
popped_value = None new_items = copy.deepcopy(self._data)
new_items = [] popped_value = new_items.pop(popped_key, None) # type: ignore
for (key, value) in self.data:
if key == popped_key:
popped_value = value
else:
new_items.append((key, value))
return (popped_value, ImmutableDict(new_items)) return (popped_value, ImmutableDict(new_items))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment