diff --git a/PKG-INFO b/PKG-INFO index 9c87cc02c1f691d197d9c360ffea667dc9a35977..8afa9731644afd7cb3d175b5c5735c4eee8aae86 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: swh.model -Version: 0.0.22 +Version: 0.0.23 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/docs/data-model.rst b/docs/data-model.rst index 1693ae496545272e794cf19cea3f6bf9ef854479..f6e4f066a0ffb589d49593163367a41788e755af 100644 --- a/docs/data-model.rst +++ b/docs/data-model.rst @@ -3,11 +3,11 @@ Data model ========== -TODO - -Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor -incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis -nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo -consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse -cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non -proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +.. _swh-merkle-dag: +.. figure:: images/swh-merkle-dag.svg + :width: 1024px + :align: center + + Software Heritage archive as a Merkle DAG, augmented with crawling + information (click to zoom). + diff --git a/docs/images/.gitignore b/docs/images/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e9c694ce24364021872ec307b95ca435d82b0aff --- /dev/null +++ b/docs/images/.gitignore @@ -0,0 +1,2 @@ +swh-merkle-dag.pdf +swh-merkle-dag.svg diff --git a/docs/images/Makefile b/docs/images/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2bc794e965ba7be048c00263d49e6a8c0769a5b5 --- /dev/null +++ b/docs/images/Makefile @@ -0,0 +1,25 @@ + +MERKLE_DAG = swh-merkle-dag.pdf swh-merkle-dag.svg + +BUILD_TARGETS = +BUILD_TARGETS += $(MERKLE_DAG) + +all: $(BUILD_TARGETS) + + +# dia exporters + +%.eps: %.dia + dia -t eps --export $@ $< + +%.svg: %.dia + dia -t svg --export $@ $< + +# generic converters + +%.pdf: %.eps + epstopdf $< + + +clean: + -rm -f $(BUILD_TARGETS) diff --git a/docs/images/swh-merkle-dag.dia b/docs/images/swh-merkle-dag.dia new file mode 100644 index 0000000000000000000000000000000000000000..00edd643bf62df4f65766dcfaaf96be5611ea25b Binary files /dev/null and b/docs/images/swh-merkle-dag.dia differ diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index 9c87cc02c1f691d197d9c360ffea667dc9a35977..8afa9731644afd7cb3d175b5c5735c4eee8aae86 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: swh.model -Version: 0.0.22 +Version: 0.0.23 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/SOURCES.txt b/swh.model.egg-info/SOURCES.txt index b45f730dce555fb482c6a30a86be695e9ed496b6..4b2269ff4fe5c7dfd1b6889241146897135bb07b 100644 --- a/swh.model.egg-info/SOURCES.txt +++ b/swh.model.egg-info/SOURCES.txt @@ -26,6 +26,9 @@ docs/index.rst docs/persistent-identifiers.rst docs/_static/.placeholder docs/_templates/.placeholder +docs/images/.gitignore +docs/images/Makefile +docs/images/swh-merkle-dag.dia swh/__init__.py swh.model.egg-info/PKG-INFO swh.model.egg-info/SOURCES.txt diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index 51d2d2e9b6f94467b526ef6291bc1f76d79b8202..eef7710698e0ec499a62e85e1a507a9371f8fcf5 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -538,7 +538,7 @@ def snapshot_identifier(snapshot, *, ignore_unresolved=False): - the branch name (as raw bytes) - a null byte (``\\x00``) - the length of the target identifier, as an ascii-encoded decimal number - (``20`` for current intrinisic identifiers, ``0`` for dangling + (``20`` for current intrinsic identifiers, ``0`` for dangling branches, the length of the target branch name for branch aliases) - a colon (``:``) - the identifier of the target object pointed at by the branch, @@ -638,18 +638,35 @@ def persistent_identifier(type, object, version=1): PERSISTENT_IDENTIFIER_KEYS = [ - 'namespace', 'scheme_version', 'object_type', 'object_id'] + 'namespace', 'scheme_version', 'object_type', 'object_id', 'metadata'] + +PERSISTENT_IDENTIFIER_PARTS_SEP = ';' def parse_persistent_identifier(persistent_id): - """Parse swh's persistent identifier scheme. + """Parse swh's :ref:`persistent-identifiers` scheme. Args: persistent_id (str): A persistent identifier Returns: - dict with keys namespace, scheme_version, object_type, object_id + dict: dict with keys : + + * namespace, holding str value + * scheme_version, holding str value + * object_type, holding str value + * object_id, holding str value + * metadata, holding dict value """ - data = persistent_id.split(':') + persistent_id_parts = persistent_id.split(PERSISTENT_IDENTIFIER_PARTS_SEP) + data = persistent_id_parts.pop(0).split(':') + persistent_id_metadata = {} + for part in persistent_id_parts: + try: + key, val = part.split('=') + persistent_id_metadata[key] = val + except Exception: + pass + data.append(persistent_id_metadata) return dict(zip(PERSISTENT_IDENTIFIER_KEYS, data)) diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index 26dc01e98543557197926ac41da6733d8de786ff..afe943b6de8699ffb1775c8df135253c3a1d1a42 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -818,6 +818,32 @@ class SnapshotIdentifier(unittest.TestCase): 'scheme_version': _version, 'object_type': _type, 'object_id': _hash, + 'metadata': {} + } + actual_result = identifiers.parse_persistent_identifier(pid) + self.assertEquals(actual_result, expected_result) + + for pid, _type, _version, _hash, _metadata in [ + ('swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython', # noqa + 'cnt', '1', '9c95815d9e9d91b8dae8e05d8bbc696fe19f796b', + { + 'lines': '1-18', + 'origin': 'https://github.com/python/cpython' + }), + ('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=deb://Debian/packages/linuxdoc-tools', # noqa + 'dir', '1', '0b6959356d30f1a4e9b7f6bca59b9a336464c03d', + { + 'origin': 'deb://Debian/packages/linuxdoc-tools' + }), + ('swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed', # noqa + 'dir', '1', '0b6959356d30f1a4e9b7f6bca59b9a336464c03d', {}) + ]: + expected_result = { + 'namespace': 'swh', + 'scheme_version': _version, + 'object_type': _type, + 'object_id': _hash, + 'metadata': _metadata } actual_result = identifiers.parse_persistent_identifier(pid) self.assertEquals(actual_result, expected_result) diff --git a/version.txt b/version.txt index 3a338cb2e577d69dc9a1844e049bfc834a93a7e7..c0bdf11c06ccb6f33f80e32ef7f296cd2080242e 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.22-0-ga06122e \ No newline at end of file +v0.0.23-0-g448eafa \ No newline at end of file