Skip to content
Snippets Groups Projects
Commit d6d17dad authored by Antoine Lambert's avatar Antoine Lambert
Browse files

collections: Improve ImmutableDict look up by key performance

Previously when looking up data by key in an ImmutableDict, the inner
tuple storing keys and values was iterated until finding the requested
key.

This is not really efficient when the ImmutableDict contains a lot of
entries, typically for an origin snapshot containing a lot of branches.

So use an inner dictionary to speedup look up by key operations and
improve loader performances.
parent 38c58b0c
No related branches found
No related tags found
No related merge requests found
Pipeline #607 passed
# Copyright (C) 2020 The Software Heritage developers
# Copyright (C) 2020-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import annotations
"""Utility data structures."""
from collections.abc import Mapping
import copy
from typing import Dict, Generic, Iterable, Optional, Tuple, TypeVar, Union
KT = TypeVar("KT")
......@@ -18,36 +21,35 @@ class ImmutableDict(Mapping, Generic[KT, VT]):
This class behaves like a dictionary, but internally stores objects in a tuple,
so it is both immutable and hashable."""
data: Tuple[Tuple[KT, VT], ...]
_data: Dict[KT, VT]
def __init__(
self,
data: Union[
Iterable[Tuple[KT, VT]], "ImmutableDict[KT, VT]", Dict[KT, VT]
] = {},
data: Union[Iterable[Tuple[KT, VT]], ImmutableDict[KT, VT], Dict[KT, VT]] = {},
):
if isinstance(data, dict):
self.data = tuple(item for item in data.items())
self._data = data
elif isinstance(data, ImmutableDict):
self.data = data.data
self._data = data._data
else:
self.data = tuple(data)
self._data = {k: v for k, v in data}
@property
def data(self):
return tuple(self._data.items())
def __repr__(self):
return f"ImmutableDict({dict(self.data)!r})"
def __getitem__(self, key):
for (k, v) in self.data:
if k == key:
return v
raise KeyError(key)
return self._data[key]
def __iter__(self):
for (k, v) in self.data:
yield k
def __len__(self):
return len(self.data)
return len(self._data)
def items(self):
yield from self.data
......@@ -55,15 +57,9 @@ class ImmutableDict(Mapping, Generic[KT, VT]):
def __hash__(self):
return hash(tuple(sorted(self.data)))
def copy_pop(self, popped_key) -> Tuple[Optional[VT], "ImmutableDict[KT, VT]"]:
def copy_pop(self, popped_key) -> Tuple[Optional[VT], ImmutableDict[KT, VT]]:
"""Returns a copy of this ImmutableDict without the given key,
as well as the value associated to the key."""
popped_value = None
new_items = []
for (key, value) in self.data:
if key == popped_key:
popped_value = value
else:
new_items.append((key, value))
new_items = copy.deepcopy(self._data)
popped_value = new_items.pop(popped_key, None) # type: ignore
return (popped_value, ImmutableDict(new_items))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment