From 57a0e08925d4421e96486625268d6576a311a69b Mon Sep 17 00:00:00 2001
From: David Douard <david.douard@sdfa3.org>
Date: Wed, 29 Jan 2020 14:55:15 +0100
Subject: [PATCH] cli: add support for reading a file content from stdin in
 'swh identify' command

This allows for example to type:

  curl -s https://archive.softwareheritage.org/browse/content/sha1_git:64582b78792cd6c2d67d35da5a11bb80886a6409/raw/ | swh identify
  swh:1:cnt:64582b78792cd6c2d67d35da5a11bb80886a6409	-
---
 swh/model/cli.py            | 17 ++++++++++++++---
 swh/model/tests/test_cli.py |  9 +++++++++
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/swh/model/cli.py b/swh/model/cli.py
index ec33310b..581bb45b 100644
--- a/swh/model/cli.py
+++ b/swh/model/cli.py
@@ -45,6 +45,11 @@ def pid_of_file(path):
     return pids.persistent_identifier(pids.CONTENT, object)
 
 
+def pid_of_file_content(data):
+    object = Content.from_bytes(mode=644, data=data).get_data()
+    return pids.persistent_identifier(pids.CONTENT, object)
+
+
 def pid_of_dir(path):
     object = Directory.from_disk(path=path).get_data()
     return pids.persistent_identifier(pids.DIRECTORY, object)
@@ -85,7 +90,7 @@ def pid_of_git_repo(path):
 
 def identify_object(obj_type, follow_symlinks, obj):
     if obj_type == 'auto':
-        if os.path.isfile(obj):
+        if obj == '-' or os.path.isfile(obj):
             obj_type = 'content'
         elif os.path.isdir(obj):
             obj_type = 'directory'
@@ -101,7 +106,10 @@ def identify_object(obj_type, follow_symlinks, obj):
 
     pid = None
 
-    if obj_type in ['content', 'directory']:
+    if obj == '-':
+        content = sys.stdin.buffer.read()
+        pid = pid_of_file_content(content)
+    elif obj_type in ['content', 'directory']:
         path = obj.encode(sys.getfilesystemencoding())
         if follow_symlinks and os.path.islink(obj):
             path = os.path.realpath(obj)
@@ -134,7 +142,7 @@ def identify_object(obj_type, follow_symlinks, obj):
               help='type of object to identify (default: auto)')
 @click.option('--verify', '-v', metavar='PID', type=PidParamType(),
               help='reference identifier to be compared with computed one')
-@click.argument('objects', nargs=-1, required=True)
+@click.argument('objects', nargs=-1)
 def identify(obj_type, verify, show_filename, follow_symlinks, objects):
     """Compute the Software Heritage persistent identifier (PID) for the given
     source code object(s).
@@ -163,6 +171,9 @@ def identify(obj_type, verify, show_filename, follow_symlinks, objects):
       swh:1:snp:510aa88bdc517345d258c1fc2babcd0e1f905e93	helloworld.git
 
     """  # NoQA  # overlong lines in shell examples are fine
+    if not objects:
+        objects = ['-']
+
     if verify and len(objects) != 1:
         raise click.BadParameter('verification requires a single object')
 
diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py
index f20da7a8..4d4ff011 100644
--- a/swh/model/tests/test_cli.py
+++ b/swh/model/tests/test_cli.py
@@ -37,6 +37,15 @@ class TestIdentify(DataMixin, unittest.TestCase):
             self.assertPidOK(result,
                              'swh:1:cnt:' + hash_to_hex(content['sha1_git']))
 
+    def test_content_id_from_stdin(self):
+        """identify file content"""
+        self.make_contents(self.tmpdir_name)
+        for _, content in self.contents.items():
+            result = self.runner.invoke(cli.identify,
+                                        input=content['data'])
+            self.assertPidOK(result,
+                             'swh:1:cnt:' + hash_to_hex(content['sha1_git']))
+
     def test_directory_id(self):
         """identify an entire directory"""
         self.make_from_tarball(self.tmpdir_name)
-- 
GitLab