From 814a6c8416d56f5f8b3e590d419d5aea7a888ab2 Mon Sep 17 00:00:00 2001 From: Pierre-Yves David <pierre-yves.david@ens-lyon.org> Date: Tue, 20 Sep 2022 14:26:17 +0200 Subject: [PATCH] from_disk: only build a model object once Before this change, a Directory object was built to compute the `id` of we fed to the Directory object we built for `to_model`. We tested this change on simple information of the Mercurial loader, with a noop-loader stockage: swh loader run mercurial https://foss.heptapod.net/mercurial/mercurial-devel directory=/data/repos/mercurial-devel = Median time of 3 run = before: 17 minutes 48 seconds after: 12 minutes 59 seconds On a profile of the same run, the `to_model` call of the from_disk's `Directory` class took the following percentage: before: 43% after: 24% --- CONTRIBUTORS | 1 + swh/model/from_disk.py | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 7565e3d6..e69f838f 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -1,3 +1,4 @@ Daniele Serafini Ishan Bhanuka Antoine Cezar +Pierre-Yves David diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py index 9ef7afa8..4c4fe0ce 100644 --- a/swh/model/from_disk.py +++ b/swh/model/from_disk.py @@ -401,7 +401,7 @@ class Directory(MerkleNode): for instance when the client is applying diffs. """ - __slots__ = ["__entries"] + __slots__ = ["__entries", "__model_object"] object_type: Final = "directory" @classmethod @@ -447,9 +447,11 @@ class Directory(MerkleNode): def __init__(self, data=None): super().__init__(data=data) self.__entries = None + self.__model_object = None def invalidate_hash(self): self.__entries = None + self.__model_object = None super().invalidate_hash() @staticmethod @@ -497,12 +499,14 @@ class Directory(MerkleNode): return CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=self.hash) def compute_hash(self): - return model.Directory.from_dict({"entries": self.entries}).id + return self.to_model().id def to_model(self) -> model.Directory: """Builds a `model.Directory` object based on this node; ignoring its children.""" - return model.Directory.from_dict(self.get_data()) + if self.__model_object is None: + self.__model_object = model.Directory.from_dict({"entries": self.entries}) + return self.__model_object def __getitem__(self, key): if not isinstance(key, bytes): -- GitLab