diff --git a/PKG-INFO b/PKG-INFO index d5b37a98f21d98df7b25d174c5a949f2e628de42..9c87cc02c1f691d197d9c360ffea667dc9a35977 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: swh.model -Version: 0.0.21 +Version: 0.0.22 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/docs/persistent-identifiers.rst b/docs/persistent-identifiers.rst index c796a808ff8670f9a13cc13f32b5018ccdfbc35d..7f41d610561b80a288aa3719eeeca543bc92848d 100644 --- a/docs/persistent-identifiers.rst +++ b/docs/persistent-identifiers.rst @@ -68,7 +68,7 @@ captured by ``<object_type>``: * ``rel`` to **releases**, * ``rev`` to **revisions**, * ``dir`` to **directories**, -* ``cnt`` to **releases**. +* ``cnt`` to **contents**. The actual object pointed to is identified by the intrinsic identifier ``<object_id>``, which is a hex-encoded (using lowercase ASCII characters) SHA1 diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index d5b37a98f21d98df7b25d174c5a949f2e628de42..9c87cc02c1f691d197d9c360ffea667dc9a35977 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: swh.model -Version: 0.0.21 +Version: 0.0.22 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/SOURCES.txt b/swh.model.egg-info/SOURCES.txt index d8706419ea007d2a2294198e0fc110ad06cfad9b..b45f730dce555fb482c6a30a86be695e9ed496b6 100644 --- a/swh.model.egg-info/SOURCES.txt +++ b/swh.model.egg-info/SOURCES.txt @@ -38,6 +38,7 @@ swh/model/from_disk.py swh/model/hashutil.py swh/model/identifiers.py swh/model/merkle.py +swh/model/toposort.py swh/model/validators.py swh/model/fields/__init__.py swh/model/fields/compound.py @@ -49,6 +50,7 @@ swh/model/tests/test_from_disk.py swh/model/tests/test_hashutil.py swh/model/tests/test_identifiers.py swh/model/tests/test_merkle.py +swh/model/tests/test_toposort.py swh/model/tests/test_validators.py swh/model/tests/fields/__init__.py swh/model/tests/fields/test_compound.py diff --git a/swh/model/tests/test_toposort.py b/swh/model/tests/test_toposort.py new file mode 100644 index 0000000000000000000000000000000000000000..66a8ee1c8ae9543b6dffdccf3a7fd389f8150630 --- /dev/null +++ b/swh/model/tests/test_toposort.py @@ -0,0 +1,99 @@ +# Copyright (C) 2017-2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import unittest +from swh.model.toposort import toposort + + +def is_toposorted_slow(revision_log): + """Check (inefficiently) that the given revision log is in any topological + order. + + Complexity: O(n^2). + (Note: It's totally possible to write a O(n) is_toposorted function, + but it requires computing the transitive closure of the input DAG, + which requires computing a topological ordering of that DAG, which + kind of defeats the purpose of writing unit tests for toposort().) + + Args: + revision_log: Revision log as returned by + swh.storage.Storage.revision_log(). + + Returns: + True if the revision log is topologically sorted. + """ + rev_by_id = {r['id']: r for r in revision_log} + + def all_parents(revision): + for parent in revision['parents']: + yield parent + yield from all_parents(rev_by_id[parent]) + + visited = set() + for rev in revision_log: + visited.add(rev['id']) + if not all(parent in visited for parent in all_parents(rev)): + return False + return True + + +class TestToposort(unittest.TestCase): + def generate_log(self, graph): + for node_id, parents in graph.items(): + yield {'id': node_id, 'parents': tuple(parents)} + + def unordered_log(self, log): + return {(d['id'], tuple(d['parents'])) for d in log} + + def check(self, graph): + log = list(self.generate_log(graph)) + topolog = list(toposort(log)) + self.assertEqual(len(topolog), len(graph)) + self.assertEqual(self.unordered_log(topolog), self.unordered_log(log)) + self.assertTrue(is_toposorted_slow(toposort(log))) + + def test_linked_list(self): + self.check({3: [2], + 2: [1], + 1: []}) + + def test_fork(self): + self.check({7: [6], + 6: [4], + 5: [3], + 4: [2], + 3: [2], + 2: [1], + 1: []}) + + def test_fork_merge(self): + self.check({8: [7, 5], + 7: [6], + 6: [4], + 5: [3], + 4: [2], + 3: [2], + 2: [1], + 1: []}) + + def test_two_origins(self): + self.check({9: [8], + 8: [7, 5], + 7: [6], + 6: [4], + 5: [3], + 4: [], + 3: []}) + + def test_three_way(self): + self.check({9: [8, 4, 2], + 8: [7, 5], + 7: [6], + 6: [4], + 5: [3], + 4: [2], + 3: [2], + 2: [1], + 1: []}) diff --git a/swh/model/toposort.py b/swh/model/toposort.py new file mode 100644 index 0000000000000000000000000000000000000000..b0a7231a5e58a70d3d6c36477fd763717fde06cf --- /dev/null +++ b/swh/model/toposort.py @@ -0,0 +1,43 @@ +# Copyright (C) 2017-2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import collections + + +def toposort(revision_log): + """Perform a topological sort on a revision log graph. + + Complexity: O(N) (linear in the length of the revision log) + + Args: + revision_log: Revision log as returned by + swh.storage.Storage.revision_log(). + + Yields: + The revision log sorted by a topological order + """ + in_degree = {} # rev_id -> numbers of parents left to compute + children = collections.defaultdict(list) # rev_id -> children + + # Compute the in_degrees and the parents of all the revisions. + # Add the roots to the processing queue. + queue = collections.deque() + for rev in revision_log: + parents = rev['parents'] + in_degree[rev['id']] = len(parents) + if not parents: + queue.append(rev) + for parent in parents: + children[parent].append(rev) + + # Topological sort: yield the 'ready' nodes, decrease the in degree of + # their children and add the 'ready' ones to the queue. + while queue: + rev = queue.popleft() + yield rev + for child in children[rev['id']]: + in_degree[child['id']] -= 1 + if in_degree[child['id']] == 0: + queue.append(child) diff --git a/version.txt b/version.txt index 943a41a2f9b8a76b2fa6d967ea9da976022b3ae6..3a338cb2e577d69dc9a1844e049bfc834a93a7e7 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.21-0-gbdf26f5 \ No newline at end of file +v0.0.22-0-ga06122e \ No newline at end of file