diff --git a/swh/storage/algos/diff.py b/swh/storage/algos/diff.py index 1d75ffe27d19670a33d9683d670aa0fae63d9246..6eafbb85698d9a720df4519fecfae7d0117beca0 100644 --- a/swh/storage/algos/diff.py +++ b/swh/storage/algos/diff.py @@ -14,6 +14,7 @@ import collections +from swh.model.hashutil import hash_to_bytes from swh.model.identifiers import directory_identifier from .dir_iterators import ( @@ -21,7 +22,7 @@ from .dir_iterators import ( ) # get the hash identifier for an empty directory -_empty_dir_hash = directory_identifier({'entries': []}) +_empty_dir_hash = hash_to_bytes(directory_identifier({'entries': []})) def _get_rev(storage, rev_id): diff --git a/swh/storage/algos/dir_iterators.py b/swh/storage/algos/dir_iterators.py index 37d73326cd02f1cce86eabd010a6e9312b267ad9..897d0a82c0e1321670168dd0595806d8a7508470 100644 --- a/swh/storage/algos/dir_iterators.py +++ b/swh/storage/algos/dir_iterators.py @@ -13,10 +13,11 @@ from enum import Enum +from swh.model.hashutil import hash_to_bytes from swh.model.identifiers import directory_identifier # get the hash identifier for an empty directory -_empty_dir_hash = directory_identifier({'entries': []}) +_empty_dir_hash = hash_to_bytes(directory_identifier({'entries': []})) def _get_dir(storage, dir_id): @@ -69,19 +70,15 @@ class DirectoryIterator(object): Args: dir_id (bytes): identifier of a root directory """ - if dir_id: - if dir_id == _empty_dir_hash: - self.frames.append([]) - else: - # get directory entries - dir_data = _get_dir(self.storage, dir_id) - # sort them in lexicographical order - dir_data = sorted(dir_data, key=lambda e: e['name']) - # reverse the ordering in order to unstack the "smallest" - # entry each time the iterator advances - dir_data.reverse() - # push the directory frame to the main stack - self.frames.append(dir_data) + # get directory entries + dir_data = _get_dir(self.storage, dir_id) + # sort them in lexicographical order + dir_data = sorted(dir_data, key=lambda e: e['name']) + # reverse the ordering in order to unstack the "smallest" + # entry each time the iterator advances + dir_data.reverse() + # push the directory frame to the main stack + self.frames.append(dir_data) def top(self): """ @@ -157,7 +154,8 @@ class DirectoryIterator(object): self.has_started = True return current - if descend and self.current_is_dir(): + if descend and self.current_is_dir() \ + and current['target'] != _empty_dir_hash: self._push_dir_frame(current['target']) else: self.drop() diff --git a/swh/storage/tests/algos/test_diff.py b/swh/storage/tests/algos/test_diff.py index 0a2f6d6601eaa37e2b61bea6668c946edd748d2d..6551e9b3976ef8eec847a6b1c6ae5f9637b77850 100644 --- a/swh/storage/tests/algos/test_diff.py +++ b/swh/storage/tests/algos/test_diff.py @@ -10,6 +10,7 @@ import unittest from nose.tools import istest, nottest from unittest.mock import patch +from swh.model.hashutil import hash_to_bytes from swh.model.identifiers import directory_identifier from swh.storage.algos import diff @@ -29,7 +30,7 @@ class DirectoryModel(object): def __getitem__(self, item): if item == 'target': - return directory_identifier(self) + return hash_to_bytes(directory_identifier(self)) else: return self.data[item] @@ -38,7 +39,7 @@ class DirectoryModel(object): if len(path_parts) == 1: self['entry_idx'][path] = len(self['entries']) self['entries'].append({ - 'target': sha1, + 'target': hash_to_bytes(sha1), 'name': path, 'perms': 33188, 'type': 'file' @@ -95,20 +96,25 @@ class TestDiffRevisions(unittest.TestCase): def diff_revisions(self, rev_from, rev_to, from_dir_model, to_dir_model, expected_changes, mock_get_dir, mock_get_rev): + rev_from_bytes = hash_to_bytes(rev_from) + rev_to_bytes = hash_to_bytes(rev_to) + def _get_rev(*args, **kwargs): - if args[1] == rev_from: + if args[1] == rev_from_bytes: return {'directory': from_dir_model['target']} else: return {'directory': to_dir_model['target']} def _get_dir(*args, **kwargs): - return from_dir_model.get_hash_data(args[1]) or \ - to_dir_model.get_hash_data(args[1]) + from_dir = from_dir_model.get_hash_data(args[1]) + to_dir = to_dir_model.get_hash_data(args[1]) + return from_dir if from_dir != None else to_dir mock_get_rev.side_effect = _get_rev mock_get_dir.side_effect = _get_dir - changes = diff.diff_revisions(None, rev_from, rev_to, track_renaming=True) + changes = diff.diff_revisions(None, rev_from_bytes, rev_to_bytes, + track_renaming=True) self.assertEqual(changes, expected_changes)