Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
swh-model
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Renaud Boyer
swh-model
Commits
eff2692a
Commit
eff2692a
authored
7 years ago
by
Nicolas Dandrimont
Browse files
Options
Downloads
Plain Diff
Merge branch 'wip/snapshots'
parents
94bd8dd5
1b1cc8d5
No related branches found
Branches containing commit
Tags
v0.0.19
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
swh/model/hashutil.py
+1
-1
1 addition, 1 deletion
swh/model/hashutil.py
swh/model/identifiers.py
+85
-0
85 additions, 0 deletions
swh/model/identifiers.py
swh/model/tests/test_identifiers.py
+89
-0
89 additions, 0 deletions
swh/model/tests/test_identifiers.py
with
175 additions
and
1 deletion
swh/model/hashutil.py
+
1
−
1
View file @
eff2692a
...
...
@@ -198,7 +198,7 @@ def hash_git_data(data, git_type, base_algo='sha1'):
ValueError if the git_type is unexpected.
"""
git_object_types
=
{
'
blob
'
,
'
tree
'
,
'
commit
'
,
'
tag
'
}
git_object_types
=
{
'
blob
'
,
'
tree
'
,
'
commit
'
,
'
tag
'
,
'
snapshot
'
}
if
git_type
not
in
git_object_types
:
raise
ValueError
(
'
Unexpected git object type %s, expected one of %s
'
%
...
...
This diff is collapsed.
Click to expand it.
swh/model/identifiers.py
+
85
−
0
View file @
eff2692a
...
...
@@ -499,3 +499,88 @@ def release_identifier(release):
components
.
extend
([
b
'
\n
'
,
release
[
'
message
'
]])
return
identifier_to_str
(
hash_git_data
(
b
''
.
join
(
components
),
'
tag
'
))
def
snapshot_identifier
(
snapshot
,
*
,
ignore_unresolved
=
False
):
"""
Return the intrinsic identifier for a snapshot.
Snapshots are a set of named branches, which are pointers to objects at any
level of the Software Heritage DAG.
As well as pointing to other objects in the Software Heritage DAG, branches
can also be *alias*es, in which case their target is the name of another
branch in the same snapshot, or *dangling*, in which case the target is
unknown (and represented by the ``None`` value).
A snapshot identifier is a salted sha1 (using the git hashing algorithm
with the ``snapshot`` object type) of a manifest following the algorithm:
1. Branches are sorted using the name as key, in bytes order.
2. For each branch, the following bytes are output:
- the type of the branch target:
- ``content``, ``directory``, ``revision``, ``release`` or ``snapshot``
for the corresponding entries in the DAG;
- ``alias`` for branches referencing another branch;
- ``dangling`` for dangling branches
- an ascii space (``
\\
x20``)
- the branch name (as raw bytes)
- a null byte (``
\\
x00``)
- the length of the target identifier, as an ascii-encoded decimal number
(``20`` for current intrinisic identifiers, ``0`` for dangling
branches, the length of the target branch name for branch aliases)
- a colon (``:``)
- the identifier of the target object pointed at by the branch,
stored in the
'
target
'
member:
- for contents: their *sha1_git*
- for directories, revisions, releases or snapshots: their intrinsic
identifier
- for branch aliases, the name of the target branch (as raw bytes)
- for dangling branches, the empty string
Note that, akin to directory manifests, there is no separator between
entries. Because of symbolic branches, identifiers are of arbitrary
length but are length-encoded to avoid ambiguity.
Args:
snapshot (dict): the snapshot of which to compute the identifier. A
single entry is needed, ``
'
branches
'
``, which is itself a :class:`dict`
mapping each branch to its target
ignore_unresolved (bool): if `True`, ignore unresolved branch aliases.
Returns:
str: the intrinsic identifier for `snapshot`
"""
unresolved
=
[]
lines
=
[]
for
name
,
target
in
sorted
(
snapshot
[
'
branches
'
].
items
()):
if
not
target
:
target_type
=
b
'
dangling
'
target_id
=
b
''
elif
target
[
'
target_type
'
]
==
'
alias
'
:
target_type
=
b
'
alias
'
target_id
=
target
[
'
target
'
]
if
target_id
not
in
snapshot
[
'
branches
'
]
or
target_id
==
name
:
unresolved
.
append
((
name
,
target_id
))
else
:
target_type
=
target
[
'
target_type
'
].
encode
()
target_id
=
identifier_to_bytes
(
target
[
'
target
'
])
lines
.
extend
([
target_type
,
b
'
\x20
'
,
name
,
b
'
\x00
'
,
(
'
%d:
'
%
len
(
target_id
)).
encode
(),
target_id
,
])
if
unresolved
and
not
ignore_unresolved
:
raise
ValueError
(
'
Branch aliases unresolved: %s
'
%
'
,
'
.
join
(
'
%s -> %s
'
%
(
name
,
target
)
for
name
,
target
in
unresolved
))
return
identifier_to_str
(
hash_git_data
(
b
''
.
join
(
lines
),
'
snapshot
'
))
This diff is collapsed.
Click to expand it.
swh/model/tests/test_identifiers.py
+
89
−
0
View file @
eff2692a
...
...
@@ -679,3 +679,92 @@ o6X/3T+vm8K3bf3driRr34c=
identifiers
.
release_identifier
(
self
.
release_newline_in_author
),
identifiers
.
identifier_to_str
(
self
.
release_newline_in_author
[
'
id
'
])
)
class
SnapshotIdentifier
(
unittest
.
TestCase
):
def
setUp
(
self
):
super
().
setUp
()
self
.
empty
=
{
'
id
'
:
'
1a8893e6a86f444e8be8e7bda6cb34fb1735a00e
'
,
'
branches
'
:
{},
}
self
.
dangling_branch
=
{
'
id
'
:
'
c84502e821eb21ed84e9fd3ec40973abc8b32353
'
,
'
branches
'
:
{
b
'
HEAD
'
:
None
,
},
}
self
.
unresolved
=
{
'
id
'
:
'
84b4548ea486e4b0a7933fa541ff1503a0afe1e0
'
,
'
branches
'
:
{
b
'
foo
'
:
{
'
target
'
:
b
'
bar
'
,
'
target_type
'
:
'
alias
'
,
},
},
}
self
.
all_types
=
{
'
id
'
:
'
6e65b86363953b780d92b0a928f3e8fcdd10db36
'
,
'
branches
'
:
{
b
'
directory
'
:
{
'
target
'
:
'
1bd0e65f7d2ff14ae994de17a1e7fe65111dcad8
'
,
'
target_type
'
:
'
directory
'
,
},
b
'
content
'
:
{
'
target
'
:
'
fe95a46679d128ff167b7c55df5d02356c5a1ae1
'
,
'
target_type
'
:
'
content
'
,
},
b
'
alias
'
:
{
'
target
'
:
b
'
revision
'
,
'
target_type
'
:
'
alias
'
,
},
b
'
revision
'
:
{
'
target
'
:
'
aafb16d69fd30ff58afdd69036a26047f3aebdc6
'
,
'
target_type
'
:
'
revision
'
,
},
b
'
release
'
:
{
'
target
'
:
'
7045404f3d1c54e6473c71bbb716529fbad4be24
'
,
'
target_type
'
:
'
release
'
,
},
b
'
snapshot
'
:
{
'
target
'
:
'
1a8893e6a86f444e8be8e7bda6cb34fb1735a00e
'
,
'
target_type
'
:
'
snapshot
'
,
},
b
'
dangling
'
:
None
,
}
}
def
test_empty_snapshot
(
self
):
self
.
assertEqual
(
identifiers
.
snapshot_identifier
(
self
.
empty
),
identifiers
.
identifier_to_str
(
self
.
empty
[
'
id
'
]),
)
def
test_dangling_branch
(
self
):
self
.
assertEqual
(
identifiers
.
snapshot_identifier
(
self
.
dangling_branch
),
identifiers
.
identifier_to_str
(
self
.
dangling_branch
[
'
id
'
]),
)
def
test_unresolved
(
self
):
with
self
.
assertRaisesRegex
(
ValueError
,
"
b
'
foo
'
-> b
'
bar
'"
):
identifiers
.
snapshot_identifier
(
self
.
unresolved
)
def
test_unresolved_force
(
self
):
self
.
assertEqual
(
identifiers
.
snapshot_identifier
(
self
.
unresolved
,
ignore_unresolved
=
True
,
),
identifiers
.
identifier_to_str
(
self
.
unresolved
[
'
id
'
]),
)
def
test_all_types
(
self
):
self
.
assertEqual
(
identifiers
.
snapshot_identifier
(
self
.
all_types
),
identifiers
.
identifier_to_str
(
self
.
all_types
[
'
id
'
]),
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment