diff --git a/PKG-INFO b/PKG-INFO index 1d0b3f063793879a0213fde8fc01b5eb6aa27a13..c5e6a7f0c79a058b0222de1f126fbe93293c6f80 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.52 +Version: 0.0.53 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index 1d0b3f063793879a0213fde8fc01b5eb6aa27a13..c5e6a7f0c79a058b0222de1f126fbe93293c6f80 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: swh.model -Version: 0.0.52 +Version: 0.0.53 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh/model/cli.py b/swh/model/cli.py index ec33310b05722dc1fe1325e3bcb489737a42547a..581bb45b892cae89ab2bb6415fb8d608d905784b 100644 --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -45,6 +45,11 @@ def pid_of_file(path): return pids.persistent_identifier(pids.CONTENT, object) +def pid_of_file_content(data): + object = Content.from_bytes(mode=644, data=data).get_data() + return pids.persistent_identifier(pids.CONTENT, object) + + def pid_of_dir(path): object = Directory.from_disk(path=path).get_data() return pids.persistent_identifier(pids.DIRECTORY, object) @@ -85,7 +90,7 @@ def pid_of_git_repo(path): def identify_object(obj_type, follow_symlinks, obj): if obj_type == 'auto': - if os.path.isfile(obj): + if obj == '-' or os.path.isfile(obj): obj_type = 'content' elif os.path.isdir(obj): obj_type = 'directory' @@ -101,7 +106,10 @@ def identify_object(obj_type, follow_symlinks, obj): pid = None - if obj_type in ['content', 'directory']: + if obj == '-': + content = sys.stdin.buffer.read() + pid = pid_of_file_content(content) + elif obj_type in ['content', 'directory']: path = obj.encode(sys.getfilesystemencoding()) if follow_symlinks and os.path.islink(obj): path = os.path.realpath(obj) @@ -134,7 +142,7 @@ def identify_object(obj_type, follow_symlinks, obj): help='type of object to identify (default: auto)') @click.option('--verify', '-v', metavar='PID', type=PidParamType(), help='reference identifier to be compared with computed one') -@click.argument('objects', nargs=-1, required=True) +@click.argument('objects', nargs=-1) def identify(obj_type, verify, show_filename, follow_symlinks, objects): """Compute the Software Heritage persistent identifier (PID) for the given source code object(s). @@ -163,6 +171,9 @@ def identify(obj_type, verify, show_filename, follow_symlinks, objects): swh:1:snp:510aa88bdc517345d258c1fc2babcd0e1f905e93 helloworld.git """ # NoQA # overlong lines in shell examples are fine + if not objects: + objects = ['-'] + if verify and len(objects) != 1: raise click.BadParameter('verification requires a single object') diff --git a/swh/model/hypothesis_strategies.py b/swh/model/hypothesis_strategies.py index 15e3bdd94f44c882cd65d581926dc9ebac64bfc7..fcf67ca43136823cdc56502659cc5be285b498f7 100644 --- a/swh/model/hypothesis_strategies.py +++ b/swh/model/hypothesis_strategies.py @@ -200,6 +200,7 @@ def snapshots(draw, *, min_size=0, max_size=100, only_objects=False): for alias in unresolved_aliases: branches[alias] = draw(branch_targets(only_objects=True)) + # Ensure no cycles between aliases while True: try: id_ = snapshot_identifier({ diff --git a/swh/model/model.py b/swh/model/model.py index f551706d31cd491e034ab2c72fead75fab3065be..4a308a4045eaf1b66e010fb0a9e9b477a46af0de 100644 --- a/swh/model/model.py +++ b/swh/model/model.py @@ -119,7 +119,7 @@ class TimestampWithTimezone(BaseModel): @classmethod def from_dict(cls, d): """Builds a TimestampWithTimezone from any of the formats - accepted by :py:`swh.model.normalize_timestamp`.""" + accepted by :func:`swh.model.normalize_timestamp`.""" d = normalize_timestamp(d) return cls( timestamp=Timestamp.from_dict(d['timestamp']), @@ -296,8 +296,8 @@ class Revision(BaseModel, HashableObject): message = attr.ib(type=bytes) author = attr.ib(type=Person) committer = attr.ib(type=Person) - date = attr.ib(type=TimestampWithTimezone) - committer_date = attr.ib(type=TimestampWithTimezone) + date = attr.ib(type=Optional[TimestampWithTimezone]) + committer_date = attr.ib(type=Optional[TimestampWithTimezone]) type = attr.ib(type=RevisionType) directory = attr.ib(type=Sha1Git) synthetic = attr.ib(type=bool) @@ -314,12 +314,20 @@ class Revision(BaseModel, HashableObject): @classmethod def from_dict(cls, d): d = d.copy() + date = d.pop('date') + if date: + date = TimestampWithTimezone.from_dict(date) + + committer_date = d.pop('committer_date') + if committer_date: + committer_date = TimestampWithTimezone.from_dict( + committer_date) + return cls( author=Person.from_dict(d.pop('author')), committer=Person.from_dict(d.pop('committer')), - date=TimestampWithTimezone.from_dict(d.pop('date')), - committer_date=TimestampWithTimezone.from_dict( - d.pop('committer_date')), + date=date, + committer_date=committer_date, type=RevisionType(d.pop('type')), **d) diff --git a/swh/model/tests/test_cli.py b/swh/model/tests/test_cli.py index f20da7a83a6d9a77d185033fe77c52b6b5d8ed82..4d4ff0116efe58fe26fe6cc5733462c4b58af533 100644 --- a/swh/model/tests/test_cli.py +++ b/swh/model/tests/test_cli.py @@ -37,6 +37,15 @@ class TestIdentify(DataMixin, unittest.TestCase): self.assertPidOK(result, 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])) + def test_content_id_from_stdin(self): + """identify file content""" + self.make_contents(self.tmpdir_name) + for _, content in self.contents.items(): + result = self.runner.invoke(cli.identify, + input=content['data']) + self.assertPidOK(result, + 'swh:1:cnt:' + hash_to_hex(content['sha1_git'])) + def test_directory_id(self): """identify an entire directory""" self.make_from_tarball(self.tmpdir_name) diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py index a5719ca0a86f32b3a38cd3f9653fcc706785d766..5560127ed87e1f7e961c25e00e9d8c39db492d07 100644 --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -66,49 +66,49 @@ def test_directory_model_id_computation(): dir_dict = dict(directory_example) del dir_dict['id'] - dir_model = Directory(**dir_dict) - assert dir_model.id - assert dir_model.id == hash_to_bytes(directory_identifier(dir_dict)) - - dir_model = Directory.from_dict(dir_dict) - assert dir_model.id - assert dir_model.id == hash_to_bytes(directory_identifier(dir_dict)) + dir_id = hash_to_bytes(directory_identifier(dir_dict)) + for dir_model in [Directory(**dir_dict), Directory.from_dict(dir_dict)]: + assert dir_model.id == dir_id def test_revision_model_id_computation(): rev_dict = dict(revision_example) del rev_dict['id'] - rev_model = Revision(**rev_dict) - assert rev_model.id - assert rev_model.id == hash_to_bytes(revision_identifier(rev_dict)) + rev_id = hash_to_bytes(revision_identifier(rev_dict)) + for rev_model in [Revision(**rev_dict), Revision.from_dict(rev_dict)]: + assert rev_model.id == rev_id + + +def test_revision_model_id_computation_with_no_date(): + """We can have revision with date to None + + """ + rev_dict = dict(revision_example) + rev_dict['date'] = None + rev_dict['committer_date'] = None + del rev_dict['id'] - rev_model = Revision.from_dict(rev_dict) - assert rev_model.id - assert rev_model.id == hash_to_bytes(revision_identifier(rev_dict)) + rev_id = hash_to_bytes(revision_identifier(rev_dict)) + for rev_model in [Revision(**rev_dict), Revision.from_dict(rev_dict)]: + assert rev_model.date is None + assert rev_model.committer_date is None + assert rev_model.id == rev_id def test_release_model_id_computation(): rel_dict = dict(release_example) del rel_dict['id'] - rel_model = Release(**rel_dict) - assert rel_model.id - assert rel_model.id == hash_to_bytes(release_identifier(rel_dict)) - - rel_model = Release.from_dict(rel_dict) - assert rel_model.id - assert rel_model.id == hash_to_bytes(release_identifier(rel_dict)) + rel_id = hash_to_bytes(release_identifier(rel_dict)) + for rel_model in [Release(**rel_dict), Release.from_dict(rel_dict)]: + assert rel_model.id == hash_to_bytes(rel_id) def test_snapshot_model_id_computation(): snp_dict = dict(snapshot_example) del snp_dict['id'] - snp_model = Snapshot(**snp_dict) - assert snp_model.id - assert snp_model.id == hash_to_bytes(snapshot_identifier(snp_dict)) - - snp_model = Snapshot.from_dict(snp_dict) - assert snp_model.id - assert snp_model.id == hash_to_bytes(snapshot_identifier(snp_dict)) + snp_id = hash_to_bytes(snapshot_identifier(snp_dict)) + for snp_model in [Snapshot(**snp_dict), Snapshot.from_dict(snp_dict)]: + assert snp_model.id == snp_id diff --git a/version.txt b/version.txt index c9e30f8952411f8a87cf2ee9ca7cb14a2cae870d..235911d5ec143fc28371a75af99adde637188aaa 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.52-0-g4e4c4ff \ No newline at end of file +v0.0.53-0-g4b779e1 \ No newline at end of file