From 4c070f98ca09255a54fb3fc9efbcf9d6e099ac96 Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Thu, 20 Feb 2020 16:46:56 +0100
Subject: [PATCH] Sort from_disk.Directory entries.

It should be cheap enough to do it, and it makes tests easier.
---
 swh/model/from_disk.py            |  8 +++++---
 swh/model/identifiers.py          |  4 ++--
 swh/model/tests/test_from_disk.py | 15 ++++++++++++++-
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/swh/model/from_disk.py b/swh/model/from_disk.py
index 45c60ca9..e1afc6b3 100644
--- a/swh/model/from_disk.py
+++ b/swh/model/from_disk.py
@@ -12,7 +12,7 @@ from typing import List
 from .hashutil import MultiHash, HASH_BLOCK_SIZE
 from .merkle import MerkleLeaf, MerkleNode
 from .identifiers import (
-    directory_identifier,
+    directory_entry_sort_key, directory_identifier,
     identifier_to_bytes as id_to_bytes,
     identifier_to_str as id_to_str,
 )
@@ -325,11 +325,13 @@ class Directory(MerkleNode):
 
     @property
     def entries(self):
+        """Child nodes, sorted by name in the same way `directory_identifier`
+        does."""
         if self.__entries is None:
-            self.__entries = [
+            self.__entries = sorted((
                 self.child_to_directory_entry(name, child)
                 for name, child in self.items()
-            ]
+            ), key=directory_entry_sort_key)
 
         return self.__entries
 
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
index 9257de14..85fc76c4 100644
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -114,7 +114,7 @@ def content_identifier(content):
     return MultiHash.from_data(content['data']).digest()
 
 
-def _sort_key(entry):
+def directory_entry_sort_key(entry):
     """The sorting key for tree entries"""
     if entry['type'] == 'dir':
         return entry['name'] + b'/'
@@ -182,7 +182,7 @@ def directory_identifier(directory):
 
     components = []
 
-    for entry in sorted(directory['entries'], key=_sort_key):
+    for entry in sorted(directory['entries'], key=directory_entry_sort_key):
         components.extend([
             _perms_to_bytes(entry['perms']),
             b'\x20',
diff --git a/swh/model/tests/test_from_disk.py b/swh/model/tests/test_from_disk.py
index 1c747370..12b00f80 100644
--- a/swh/model/tests/test_from_disk.py
+++ b/swh/model/tests/test_from_disk.py
@@ -449,7 +449,7 @@ class DataMixin:
         if isinstance(right, Directory):
             right = right.get_data()
 
-        return self.assertCountEqual(left.entries, right['entries'])
+        assert left.entries == right['entries']
 
     def make_contents(self, directory):
         for filename, content in self.contents.items():
@@ -579,6 +579,7 @@ class FileToContent(DataMixin, unittest.TestCase):
                                     check_path=True)
 
 
+@pytest.mark.fs
 class DirectoryToObjects(DataMixin, unittest.TestCase):
     def setUp(self):
         super().setUp()
@@ -729,6 +730,18 @@ class DirectoryToObjects(DataMixin, unittest.TestCase):
                          len(self.contents)
                          + 1)
 
+    def test_directory_entry_order(self):
+        with tempfile.TemporaryDirectory() as dirname:
+            dirname = os.fsencode(dirname)
+            open(os.path.join(dirname, b'foo.'), 'a')
+            open(os.path.join(dirname, b'foo0'), 'a')
+            os.mkdir(os.path.join(dirname, b'foo'))
+
+            directory = Directory.from_disk(path=dirname)
+
+        assert [entry['name'] for entry in directory.entries] \
+            == [b'foo.', b'foo', b'foo0']
+
 
 @pytest.mark.fs
 class TarballTest(DataMixin, unittest.TestCase):
-- 
GitLab