From c40ab0368de4e07639e7bdc08776d420bef29aaf Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" <antoine.romain.dumont@gmail.com> Date: Thu, 23 Feb 2017 14:20:43 +0100 Subject: [PATCH] Consider special files as empty ones when computing content hashes Closes T255 Ref. D179 --- swh/model/git.py | 17 ++++++++++++++--- swh/model/tests/test_git.py | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/swh/model/git.py b/swh/model/git.py index a3503cbb..c8b4bbe8 100644 --- a/swh/model/git.py +++ b/swh/model/git.py @@ -1,10 +1,11 @@ -# Copyright (C) 2015 The Software Heritage developers +# Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os +import stat from enum import Enum @@ -160,17 +161,27 @@ def compute_blob_metadata(filepath): Args: filepath: absolute pathname of the file. + This could be special files (fifo, character or + block device), they will be considered empty files. Returns: Dictionary of values: - name: basename of the file + - length: data length - perms: git permission for file - type: git type for file - path: absolute filepath on filesystem """ - blob_metadata = hashutil.hash_path(filepath) - perms = GitPerm.EXEC if os.access(filepath, os.X_OK) else GitPerm.BLOB + mode = os.lstat(filepath).st_mode + if not stat.S_ISREG(mode): # special (block or character device, fifo) + perms = GitPerm.BLOB + blob_metadata = hashutil.hash_data(b'') + blob_metadata['length'] = 0 + else: + perms = GitPerm.EXEC if os.access(filepath, os.X_OK) else GitPerm.BLOB + blob_metadata = hashutil.hash_path(filepath) + blob_metadata.update({ 'name': os.path.basename(filepath), 'perms': perms, diff --git a/swh/model/tests/test_git.py b/swh/model/tests/test_git.py index b1eac8cb..3c233c3b 100644 --- a/swh/model/tests/test_git.py +++ b/swh/model/tests/test_git.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015 The Software Heritage developers +# Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -138,6 +138,41 @@ blah self.assertEqual(checksum, self.checksums['tag_sha1_git']) +@attr('fs') +class ComputeBlobMetadata(unittest.TestCase): + @istest + def compute_blob_metadata__special_file_returns_nothing(self): + # prepare + tmp_root_path = tempfile.mkdtemp().encode('utf-8') + name = b'fifo-file' + path = os.path.join(tmp_root_path, name) + + # given + os.mkfifo(path) + + # when + actual_metadata = git.compute_blob_metadata(path) + + # then + expected_metadata = { + 'sha1': b'\xda9\xa3\xee^kK\r2U\xbf\xef\x95`\x18\x90\xaf\xd8\x07\t', + 'sha1_git': b'\xe6\x9d\xe2\x9b\xb2\xd1\xd6CK\x8b)\xaewZ\xd8\xc2' + b'\xe4\x8cS\x91', + 'sha256': b"\xe3\xb0\xc4B\x98\xfc\x1c\x14\x9a\xfb\xf4\xc8\x99o" + b"\xb9$'\xaeA\xe4d\x9b\x93L\xa4\x95\x99\x1bxR\xb8U", + 'perms': git.GitPerm.BLOB, + 'path': path, + 'name': name, + 'type': git.GitType.BLOB, + 'length': 0 + } + + self.assertEquals(actual_metadata, expected_metadata) + + # cleanup + shutil.rmtree(tmp_root_path) + + @attr('fs') class GitHashWalkArborescenceTree: """Root class to ease walk and git hash testing without side-effecty -- GitLab