diff --git a/PKG-INFO b/PKG-INFO index c67ba2c6d6d6aa10241e2ffa390aee279a7f528a..289753947f8553ed0789b24a0bb962430b619c09 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: swh.model -Version: 0.0.4 +Version: 0.0.5 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/README-dev.md b/README-dev.md new file mode 100644 index 0000000000000000000000000000000000000000..97f9fb42da360b997cb7cca64cb7866948846930 --- /dev/null +++ b/README-dev.md @@ -0,0 +1,118 @@ +Git sha1 computation +-------------------- + +Document to describe how the git sha1 computation takes place. + +### commit/revision + +sha1 git commit/revision computation: + + commit `size`\0 + tree `sha1-git-tree-and-subtree-in-plain-hex-string` + ([parent `commit-parent-n`]) + author `name` <`email`> `date-ts` `date-offset` + committer `name` <`email`> `date-ts` `date-offset` + ([extra-header-key-n extra-header-value-n]) + + `commit-message` + (inline-gpg-signature) + + +Notes: +- [] denotes list of entries (one per line) +- () denotes optional entry. For example, the parent entry is optional. +- empty line at the end of the commit message +- timestamp example: 1444054085 +- date offset for example: +0200, -0100 + +sources: +- commit_tree_extended: https://github.com/git/git/blob/8d530c4d64ffcc853889f7b385f554d53db375ed/commit.c#L1522 +- commit_tree: https://github.com/git/git/blob/8d530c4d64ffcc853889f7b385f554d53db375ed/commit.c#L1392 + +Examples: + +```sh +$ cat commit.txt +tree 85a74718d377195e1efd0843ba4f3260bad4fe07 +parent 01e2d0627a9a6edb24c37db45db5ecb31e9de808 +author Linus Torvalds <torvalds@linux-foundation.org> 1436739030 -0700 +committer Linus Torvalds <torvalds@linux-foundation.org> 1436739030 -0700 +svn-repo-uuid 046f1af7-66c2-d61b-5410-ce57b7db7bff +svn-revision 10 + +Linux 4.2-rc2 +``` + +``` +$ cat commit.txt | git hash-object -t commit --stdin +010d34f384fa99d047cdd5e2f41e56e5c2feee45 +``` + +commit: 44cc742a8ca17b9c279be4cc195a93a6ef7a320e +``` +$ git cat-file -p 44cc742a8ca17b9c279be4cc195a93a6ef7a320e +... +tree b134f9b7dc434f593c0bab696345548b37de0558 +parent 689664ae944b4692724f13b709a4e4de28b54e57 +parent c888305e1efbaa252d01b4e5e6b778f865a97514 +author Jiang Xin <worldhello.net@gmail.com> 1428538899 +0800 +committer Jiang Xin <worldhello.net@gmail.com> 1428538899 +0800 +gpgsig -----BEGIN PGP SIGNATURE----- + Version: GnuPG v1.4.13 (Darwin) + + iQIcBAABAgAGBQJVJcYsAAoJEBiY3kIkQRNJVAUQAJ8/XQIfMqqC5oYeEFfHOPYZ + L7qy46bXHVBa9Qd8zAJ2Dou3IbI2ZoF6/Et89K/UggOycMlt5FKV/9toWyuZv4Po + L682wonoxX99qvVTHo6+wtnmYO7+G0f82h+qHMErxjP+I6gzRNBvRr+SfY7VlGdK + wikMKOMWC5smrScSHITnOq1Ews5pe3N7qDYMzK0XVZmgDoaem4RSWMJs4My/qVLN + e0CqYWq2A22GX7sXl6pjneJYQvcAXUX+CAzp24QnPSb+Q22Guj91TcxLFcHCTDdn + qgqMsEyMiisoglwrCbO+D+1xq9mjN9tNFWP66SQ48mrrHYTBV5sz9eJyDfroJaLP + CWgbDTgq6GzRMehHT3hXfYS5NNatjnhkNISXR7pnVP/obIi/vpWh5ll6Gd8q26z+ + a/O41UzOaLTeNI365MWT4/cnXohVLRG7iVJbAbCxoQmEgsYMRc/pBAzWJtLfcB2G + jdTswYL6+MUdL8sB9pZ82D+BP/YAdHe69CyTu1lk9RT2pYtI/kkfjHubXBCYEJSG + +VGllBbYG6idQJpyrOYNRJyrDi9yvDJ2W+S0iQrlZrxzGBVGTB/y65S8C+2WTBcE + lf1Qb5GDsQrZWgD+jtWTywOYHtCBwyCKSAXxSARMbNPeak9WPlcW/Jmu+fUcMe2x + dg1KdHOa34shrKDaOVzW + =od6m + -----END PGP SIGNATURE----- + +Merge branch 'master' of git://github.com/alexhenrie/git-po + +* 'master' of git://github.com/alexhenrie/git-po: + l10n: ca.po: update translation +``` + +### directory/tree + +sha1 git directory/tree computation: + + tree `tree-size`\0 + <file-perm> <file-name>\0<file-sha1-in-20-bytes-string>...<dir-perm> <dir-name>\0<dir-sha1-in-20-bytes-string>... + + +Notes: +- no newline separator between tree entries +- no empty newline at the end of the tree entries +- tree content header size is the length of the content +- The tree entries are ordered according to bytes in their <name> properties. + +Note: Tree entries referencing trees are sorted as if their name have a trailing / +at their end. + +Possible permissions are: +- 100644 - file +- 40000 - directory +- 100755 - executable file +- 120000 - symbolink link +- 160000 - git link (relative to submodule) + +### content/file + +sha1 git content computation: + + blob `blob-size`\0 + `blob-content` + +Notes: +- no newline at the end of the blob content + +Compress with DEFLATE and compute sha1 diff --git a/swh.model.egg-info/PKG-INFO b/swh.model.egg-info/PKG-INFO index c67ba2c6d6d6aa10241e2ffa390aee279a7f528a..289753947f8553ed0789b24a0bb962430b619c09 100644 --- a/swh.model.egg-info/PKG-INFO +++ b/swh.model.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.0 Name: swh.model -Version: 0.0.4 +Version: 0.0.5 Summary: Software Heritage data model Home-page: https://forge.softwareheritage.org/diffusion/DMOD/ Author: Software Heritage developers diff --git a/swh.model.egg-info/SOURCES.txt b/swh.model.egg-info/SOURCES.txt index c8d507f5264a24d895e5d227137096135a7adb1e..6d07bebeb7ebeb70ab32a0187fa2411d1beeb42f 100644 --- a/swh.model.egg-info/SOURCES.txt +++ b/swh.model.egg-info/SOURCES.txt @@ -4,6 +4,7 @@ LICENSE MANIFEST.in Makefile Makefile.local +README-dev.md requirements.txt setup.py version.txt diff --git a/swh/model/git.py b/swh/model/git.py index 79852f87cad7cb65085bc6341a43a5977c8269c6..dc7ab0d8fa7e2481736fb6bc7fb4aebc1d9383a3 100644 --- a/swh/model/git.py +++ b/swh/model/git.py @@ -114,9 +114,13 @@ def compute_link_metadata(linkpath): Returns: Dictionary of values: + - data: link's content + - length: link's content length - name: basename of the link - perms: git permission for link - type: git type for link + - path: absolute path to the link on filesystem + """ data = os.readlink(linkpath) link_metadata = hashutil.hash_data(data) @@ -143,6 +147,7 @@ def compute_blob_metadata(filepath): - name: basename of the file - perms: git permission for file - type: git type for file + - path: absolute filepath on filesystem """ blob_metadata = hashutil.hash_path(filepath) @@ -165,9 +170,11 @@ def compute_tree_metadata(dirname, ls_hashes): Returns: Dictionary of values: + - sha1_git: tree's sha1 git - name: basename of the directory - perms: git permission for directory - type: git type for directory + - path: absolute path to directory on filesystem """ return { @@ -180,7 +187,8 @@ def compute_tree_metadata(dirname, ls_hashes): def walk_and_compute_sha1_from_directory(rootdir, - dir_ok_fn=lambda dirpath: True): + dir_ok_fn=lambda dirpath: True, + with_root_tree=True): """Compute git sha1 from directory rootdir. Args: @@ -190,6 +198,10 @@ def walk_and_compute_sha1_from_directory(rootdir, defined in the function. By default, all folders are ok. Example override: dir_ok_fn = lambda dirpath: b'svn' not in dirpath + - with_root_tree: Determine if we compute the upper root tree's + checksums. As a default, we want it. One possible use case where this + is not useful is the update (cf. `update_checksums_from`) + Returns: Dictionary of entries with keys <path-name> and as values a list of directory entries. @@ -212,6 +224,9 @@ def walk_and_compute_sha1_from_directory(rootdir, ls_hashes = {} all_links = set() + if rootdir.endswith(b'/'): + rootdir = rootdir.rstrip(b'/') + def filtfn(dirpath, dirnames): return list(filter(lambda dirname: dir_ok_fn(os.path.join(dirpath, dirname)), @@ -253,14 +268,180 @@ def walk_and_compute_sha1_from_directory(rootdir, ls_hashes[dirpath].extend(dir_hashes) - # compute the current directory hashes - root_hash = { - 'sha1_git': compute_directory_git_sha1(rootdir, ls_hashes), - 'path': rootdir, - 'name': os.path.basename(rootdir), - 'perms': GitPerm.TREE, - 'type': GitType.TREE - } - ls_hashes[ROOT_TREE_KEY] = [root_hash] + if with_root_tree: + # compute the current directory hashes + root_hash = { + 'sha1_git': compute_directory_git_sha1(rootdir, ls_hashes), + 'path': rootdir, + 'name': os.path.basename(rootdir), + 'perms': GitPerm.TREE, + 'type': GitType.TREE + } + ls_hashes[ROOT_TREE_KEY] = [root_hash] return ls_hashes + + +def recompute_sha1_in_memory(root, deeper_rootdir, objects): + """Recompute git sha1 from directory deeper_rootdir to root. + + This function relies exclusively on `objects` for hashes. It + expects the deeper_rootdir and every key below that path to be + already updated. + + Args: + - root: Upper root directory (so same as + objects[ROOT_TREE_KEY][0]['path']) + + - deeper_rootdir: Root directory from which the git hash + computation begins + + - objects: objects dictionary as per returned by + `walk_and_compute_sha1_from_directory` + + Returns: + Dictionary of entries with keys <path-name> and as values a list of + directory entries. + Those are list of dictionary with keys: + - 'perms' + - 'type' + - 'name' + - 'sha1_git' + - and specifically content: 'sha1', 'sha256', ... + + Note: + One special key is ROOT_TREE_KEY to indicate the upper root of the + directory (this is the revision's target directory). + + Raises: + Nothing + If something is raised, this is a programmatic error. + + """ + # list of paths to update from bottom to top + upper_root = os.path.dirname(root) + rootdir = os.path.dirname(deeper_rootdir) + while rootdir != upper_root: + files = objects.get(rootdir, None) + if files: + ls_hashes = [] + for hashfile in files: + fulldirname = hashfile['path'] + if hashfile['type'] == GitType.TREE: + tree_hash = compute_tree_metadata(fulldirname, objects) + ls_hashes.append(tree_hash) + else: + ls_hashes.append(hashfile) + + objects[rootdir] = ls_hashes + + rootdir = os.path.dirname(rootdir) + + # update root + objects[ROOT_TREE_KEY][0]['sha1_git'] = compute_directory_git_sha1(root, + objects) + return objects + + +def commonpath(paths): + """Given a sequence of path names, returns the longest common sub-path. + + Copied from Python3.5 + + """ + + if not paths: + raise ValueError('commonpath() arg is an empty sequence') + + if isinstance(paths[0], bytes): + sep = b'/' + curdir = b'.' + else: + sep = '/' + curdir = '.' + + try: + split_paths = [path.split(sep) for path in paths] + + try: + isabs, = set(p[:1] == sep for p in paths) + except ValueError: + raise ValueError("Can't mix absolute and relative paths") + + split_paths = [ + [c for c in s if c and c != curdir] for s in split_paths] + s1 = min(split_paths) + s2 = max(split_paths) + common = s1 + for i, c in enumerate(s1): + if c != s2[i]: + common = s1[:i] + break + + prefix = sep if isabs else sep[:0] + return prefix + sep.join(common) + except (TypeError, AttributeError): + raise + + +def update_checksums_from(changed_paths, objects, + dir_ok_fn=lambda dirpath: True): + """Given a list of changed paths, recompute the checksums only where + needed. + + Args: + changed_paths: Dictionary list representing path changes. + A dictionary has the form: + - path: the full path to the file Added, Modified or Deleted + - action: A, M or D + objects: dictionary returned by `walk_and_compute_sha1_from_directory`. + + Returns: + Dictionary returned by `walk_and_compute_sha1_from_directory` + updated (mutated) according to latest filesystem modifications. + + """ + root = objects[ROOT_TREE_KEY][0]['path'] + if root.endswith(b'/'): + root = root.rstrip(b'/') + + paths = [] + # a first round-trip to ensure we don't need to... + for changed_path in changed_paths: + path = changed_path['path'] + + parent = os.path.dirname(path) + if parent == root: # ... recompute everything anyway + return walk_and_compute_sha1_from_directory(root, + dir_ok_fn) + + if changed_path['action'] == 'D': # (D)elete + k = objects.pop(path, None) + if k: # it's a dir, we need to remove the descendant paths + prefix_path = path + b'/' + new_objects = {k: objects[k] for k in objects.keys() + if not k.startswith(prefix_path)} + objects = new_objects + + paths.append(parent) + + if not paths: # no modification on paths + return objects + + rootdir = commonpath(paths) + + # common ancestor is the root anyway, no optimization possible, + # recompute all + if root == rootdir: + return walk_and_compute_sha1_from_directory(root, + dir_ok_fn) + + # Recompute from disk the checksums from impacted common ancestor + # rootdir changes. Then update the original objects with new + # checksums for the arborescence tree below rootdir + hashes = walk_and_compute_sha1_from_directory(rootdir, dir_ok_fn, + with_root_tree=False) + objects.update(hashes) + + # Recompute the hashes in memory from rootdir to root + return recompute_sha1_in_memory(root, rootdir, objects) diff --git a/swh/model/hashutil.py b/swh/model/hashutil.py index 2d5ff126b00b67f5b46534aa60f6f5118e1d6a78..b2558a3bea2f5682493cbc83381328a8dde8fefa 100644 --- a/swh/model/hashutil.py +++ b/swh/model/hashutil.py @@ -125,7 +125,9 @@ def hash_path(path, algorithms=ALGORITHMS, chunk_cb=None): """ length = os.path.getsize(path) with open(path, 'rb') as fobj: - return hash_file(fobj, length, algorithms, chunk_cb) + hash = hash_file(fobj, length, algorithms, chunk_cb) + hash['length'] = length + return hash def hash_data(data, algorithms=ALGORITHMS): diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index 36bfa20cd3b1cb4bdfff3cdf9af67d33bc6de74e..cf3b3265f1e0392b76430b880ffaee2c8cd2171b 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -7,7 +7,7 @@ import binascii import datetime from functools import lru_cache -from . import hashutil +from .hashutil import hash_data, hash_git_data @lru_cache() @@ -38,7 +38,7 @@ def identifier_to_bytes(identifier): len(identifier)) return bytes.fromhex(identifier) - raise ValueError('Wrong type for identitfier %s, expected bytes or str' % + raise ValueError('Wrong type for identifier %s, expected bytes or str' % identifier.__class__.__name__) @@ -70,7 +70,7 @@ def identifier_to_str(identifier): len(identifier)) return binascii.hexlify(identifier).decode() - raise ValueError('Wrong type for identitfier %s, expected bytes or str' % + raise ValueError('Wrong type for identifier %s, expected bytes or str' % identifier.__class__.__name__) @@ -91,7 +91,7 @@ def content_identifier(content): """ - hashes = hashutil.hash_data( + hashes = hash_data( content['data'], {'sha1', 'sha1_git', 'sha256'}, ) @@ -159,8 +159,7 @@ def directory_identifier(directory): identifier_to_bytes(entry['target']), ]) - return identifier_to_str(hashutil.hash_git_data(b''.join(components), - 'tree')) + return identifier_to_str(hash_git_data(b''.join(components), 'tree')) def format_date(date): @@ -188,7 +187,7 @@ def format_date(date): @lru_cache() -def format_offset(offset): +def format_offset(offset, negative_utc=None): """Convert an integer number of minutes into an offset representation. The offset representation is [+-]hhmm where: @@ -197,10 +196,10 @@ def format_offset(offset): A null offset is represented as +0000. """ - if offset >= 0: - sign = '+' - else: + if offset < 0 or offset == 0 and negative_utc: sign = '-' + else: + sign = '+' hours = abs(offset) // 60 minutes = abs(offset) % 60 @@ -209,44 +208,188 @@ def format_offset(offset): return t.encode() -def format_date_offset(date_offset): - """Format a date-compatible object with its timezone offset. +def normalize_timestamp(time_representation): + """Normalize a time representation for processing by Software Heritage + + This function supports a numeric timestamp (representing a number of + seconds since the UNIX epoch, 1970-01-01 at 00:00 UTC), a datetime.datetime + object (with timezone information), or a normalized Software + Heritage time representation (idempotency). + + Args: + time_representation: the representation of a timestamp + + Returns: a normalized dictionary with three keys + + - timestamp: a number of seconds since the UNIX epoch (1970-01-01 at 00:00 + UTC) + - offset: the timezone offset as a number of minutes relative to UTC + - negative_utc: a boolean representing whether the offset is -0000 when + offset = 0. - A date-compatible object is either: - - a dict with two members - timestamp: floating point number of seconds since the unix epoch - offset: (int) number of minutes representing the offset from UTC - - a datetime.datetime object with a timezone - - a numeric value (in which case the offset is hardcoded to 0) """ - # FIXME: move normalization to another module + if time_representation is None: + return None + + negative_utc = False - if isinstance(date_offset, dict): - date = date_offset['timestamp'] - offset = date_offset['offset'] - elif isinstance(date_offset, datetime.datetime): - date = date_offset - utcoffset = date_offset.utcoffset() + if isinstance(time_representation, dict): + timestamp = time_representation['timestamp'] + offset = time_representation['offset'] + if 'negative_utc' in time_representation: + negative_utc = time_representation['negative_utc'] + elif isinstance(time_representation, datetime.datetime): + timestamp = time_representation.timestamp() + utcoffset = time_representation.utcoffset() if utcoffset is None: - raise ValueError('Received a datetime without a timezone') + raise ValueError( + 'normalize_timestamp received datetime without timezone: %s' % + time_representation) + + # utcoffset is an integer number of minutes seconds_offset = utcoffset.total_seconds() - if seconds_offset - int(seconds_offset) != 0 or seconds_offset % 60: - raise ValueError('Offset is not an integer number of minutes') offset = int(seconds_offset) // 60 else: - date = date_offset + timestamp = time_representation offset = 0 - return b''.join([format_date(date), b' ', format_offset(offset)]) + return { + 'timestamp': timestamp, + 'offset': offset, + 'negative_utc': negative_utc, + } def format_author(author): - return b''.join([author['name'], b' <', author['email'], b'>']) + """Format the specification of an author. + + An author is either a byte string (passed unchanged), or a dict with three + keys, fullname, name and email. + + If the fullname exists, return it; if it doesn't, we construct a fullname + using the following heuristics: if the name value is None, we return the + email in angle brackets, else, we return the name, a space, and the email + in angle brackets. + + """ + if isinstance(author, bytes) or author is None: + return author + + if 'fullname' in author: + return author['fullname'] + + ret = [] + if author['name'] is not None: + ret.append(author['name']) + if author['email'] is not None: + ret.append(b''.join([b'<', author['email'], b'>'])) + + return b' '.join(ret) + + +def format_author_line(header, author, date_offset): + """Format a an author line according to git standards. + + An author line has three components: + - a header, describing the type of author (author, committer, tagger) + - a name and email, which is an arbitrary bytestring + - optionally, a timestamp with UTC offset specification + + The author line is formatted thus: + + `header` `name and email`[ `timestamp` `utc_offset`] + + The timestamp is encoded as a (decimal) number of seconds since the UNIX + epoch (1970-01-01 at 00:00 UTC). As an extension to the git format, we + support fractional timestamps, using a dot as the separator for the decimal + part. + + The utc offset is a number of minutes encoded as '[+-]HHMM'. Note some + tools can pass a negative offset corresponding to the UTC timezone + ('-0000'), which is valid and is encoded as such. + + For convenience, this function returns the whole line with its trailing + newline. + + Args: + header: the header of the author line (one of 'author', 'committer', + 'tagger') + author: an author specification (dict with two bytes values: name and + email, or byte value) + date_offset: a normalized date/time representation as returned by + `normalize_timestamp`. + + Returns: + the newline-terminated byte string containing the author line + + """ + + ret = [header.encode(), b' ', format_author(author)] + + date_offset = normalize_timestamp(date_offset) + + if date_offset is not None: + date_f = format_date(date_offset['timestamp']) + offset_f = format_offset(date_offset['offset'], + date_offset['negative_utc']) + + ret.extend([b' ', date_f, b' ', offset_f]) + + ret.append(b'\n') + return b''.join(ret) def revision_identifier(revision): """Return the intrinsic identifier for a revision. + + The fields used for the revision identifier computation are: + - directory + - parents + - author + - author_date + - committer + - committer_date + - metadata -> extra_headers + - message + + A revision's identifier is the 'git'-checksum of a commit manifest + constructed as follows (newlines are a single ASCII newline character): + + ``` + tree <directory identifier> + [for each parent in parents] + parent <parent identifier> + [end for each parents] + author <author> <author_date> + committer <committer> <committer_date> + [for each key, value in extra_headers] + <key> <encoded value> + [end for each extra_headers] + + <message> + ``` + + The directory identifier is the ascii representation of its hexadecimal + encoding. + + Author and committer are formatted with the `format_author` function. + Dates are formatted with the `format_date_offset` function. + + Extra headers are an ordered list of [key, value] pairs. Keys are strings + and get encoded to utf-8 for identifier computation. Values are either byte + strings, unicode strings (that get encoded to utf-8), or integers (that get + encoded to their utf-8 decimal representation). + + Multiline extra header values are escaped by indenting the continuation + lines with one ascii space. + + If the message is None, the manifest ends with the last header. Else, the + message is appended to the headers after an empty line. + + The checksum of the full manifest is computed using the 'commit' git object + type. + """ components = [ b'tree ', identifier_to_str(revision['directory']).encode(), b'\n', @@ -258,16 +401,39 @@ def revision_identifier(revision): ]) components.extend([ - b'author ', format_author(revision['author']), - b' ', format_date_offset(revision['date']), b'\n', - b'committer ', format_author(revision['committer']), - b' ', format_date_offset(revision['committer_date']), b'\n', - b'\n', - revision['message'], + format_author_line('author', revision['author'], revision['date']), + format_author_line('committer', revision['committer'], + revision['committer_date']), ]) - return identifier_to_str(hashutil.hash_git_data(b''.join(components), - 'commit')) + # Handle extra headers + metadata = revision.get('metadata') + if not metadata: + metadata = {} + + for key, value in metadata.get('extra_headers', []): + + # Integer values: decimal representation + if isinstance(value, int): + value = str(value).encode('utf-8') + + # Unicode string values: utf-8 encoding + if isinstance(value, str): + value = value.encode('utf-8') + + # multi-line values: indent continuation lines + if b'\n' in value: + value_chunks = value.split(b'\n') + value = b'\n '.join(value_chunks) + + # encode the key to utf-8 + components.extend([key.encode('utf-8'), b' ', value, b'\n']) + + if revision['message'] is not None: + components.extend([b'\n', revision['message']]) + + commit_raw = b''.join(components) + return identifier_to_str(hash_git_data(commit_raw, 'commit')) def target_type_to_git(target_type): @@ -289,12 +455,11 @@ def release_identifier(release): ] if 'author' in release and release['author']: - components.extend([ - b'tagger ', format_author(release['author']), b' ', - format_date_offset(release['date']), b'\n', - ]) + components.append( + format_author_line('tagger', release['author'], release['date']) + ) - components.extend([b'\n', release['message']]) + if release['message'] is not None: + components.extend([b'\n', release['message']]) - return identifier_to_str(hashutil.hash_git_data(b''.join(components), - 'tag')) + return identifier_to_str(hash_git_data(b''.join(components), 'tag')) diff --git a/swh/model/tests/test_git.py b/swh/model/tests/test_git.py index 435101977dc7798f0494d8f6afe7d18cd0ba8684..3ed3ff34c26c669fa044c9c5330dc17f7c06c0a1 100644 --- a/swh/model/tests/test_git.py +++ b/swh/model/tests/test_git.py @@ -4,9 +4,10 @@ # See top-level LICENSE file for more information import os +import shutil +import subprocess import tempfile import unittest -import subprocess from nose.tools import istest @@ -136,26 +137,36 @@ blah self.assertEqual(checksum, self.checksums['tag_sha1_git']) -class GitHashArborescenceTree(unittest.TestCase): - @classmethod - def setUpClass(cls): - super().setUpClass() +class GitHashWalkArborescenceTree(unittest.TestCase): + """Root class to ease walk and git hash testing without side-effecty problems. - cls.tmp_root_path = tempfile.mkdtemp().encode('utf-8') + """ + def setUp(self): + self.tmp_root_path = tempfile.mkdtemp().encode('utf-8') start_path = os.path.dirname(__file__).encode('utf-8') - sample_folder_archive = os.path.join(start_path, - b'../../../..', - b'swh-storage-testdata', - b'dir-folders', - b'sample-folder.tgz') + pkg_doc_linux_r11 = os.path.join(start_path, + b'../../../..', + b'swh-storage-testdata', + b'dir-folders', + b'sample-folder.tgz') - cls.root_path = os.path.join(cls.tmp_root_path, b'sample-folder') + self.root_path = os.path.join(self.tmp_root_path, b'sample-folder') # uncompress the sample folder subprocess.check_output( - ['tar', 'xvf', sample_folder_archive, '-C', cls.tmp_root_path]) + ['tar', 'xvf', pkg_doc_linux_r11, '-C', self.tmp_root_path]) + + def tearDown(self): + if os.path.exists(self.tmp_root_path): + shutil.rmtree(self.tmp_root_path) + +class GitHashFromScratch(GitHashWalkArborescenceTree): + """Test the main `walk_and_compute_sha1_from_directory` algorithm that + scans and compute the disk for checksums. + + """ @istest def walk_and_compute_sha1_from_directory(self): # make a temporary arborescence tree to hash without ignoring anything @@ -196,6 +207,7 @@ class GitHashArborescenceTree(unittest.TestCase): os.path.join(self.tmp_root_path, b'sample-folder/empty-folder'): [], # noqa os.path.join(self.tmp_root_path, b'sample-folder/bar/barfoo'): [{ # noqa 'type': git.GitType.BLOB, # noqa + 'length': 72, 'sha256': b'=\xb5\xae\x16\x80U\xbc\xd9:M\x08(]\xc9\x9f\xfe\xe2\x883\x03\xb2?\xac^\xab\x85\x02s\xa8\xeaUF', # noqa 'name': b'another-quote.org', # noqa 'path': os.path.join(self.tmp_root_path, b'sample-folder/bar/barfoo/another-quote.org'), # noqa @@ -210,3 +222,411 @@ class GitHashArborescenceTree(unittest.TestCase): 'sha1_git': b'\xc3\x02\x0fk\xf15\xa3\x8cm\xf3\xaf\xeb_\xb3\x822\xc5\xe0p\x87'}]} # noqa self.assertEquals(actual_walk1, expected_checksums) + + @istest + def walk_and_compute_sha1_from_directory_without_root_tree(self): + # compute the full checksums + expected_hashes = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # except for the key on that round + actual_hashes = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path, + with_root_tree=False) + + # then, removing the root tree hash from the first round + del expected_hashes[git.ROOT_TREE_KEY] + + # should give us the same checksums as the second round + self.assertEquals(actual_hashes, expected_hashes) + + +class GitHashUpdate(GitHashWalkArborescenceTree): + """Test `walk and git hash only on modified fs` functions. + + """ + @istest + def update_checksums_from_add_new_file(self): + # make a temporary arborescence tree to hash without ignoring anything + # update the disk in some way (add a new file) + # update the actual git checksums from the deeper tree modified + + # when + objects = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # update the existing file + changed_path = os.path.join(self.tmp_root_path, + b'sample-folder/bar/barfoo/new') + with open(changed_path, 'wb') as f: + f.write(b'new line') + + # walk1 (this will be our expectation) + expected_dict = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # then + actual_dict = git.update_checksums_from( + [{'path': changed_path, 'action': 'A'}], + objects) + + self.assertEquals(expected_dict, actual_dict) + + @istest + def update_checksums_from_modify_existing_file(self): + # make a temporary arborescence tree to hash without ignoring anything + # update the disk in some way () + # update the actual git checksums where only the modification is needed + + # when + objects = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # update existing file + changed_path = os.path.join( + self.tmp_root_path, + b'sample-folder/bar/barfoo/another-quote.org') + with open(changed_path, 'wb+') as f: + f.write(b'I have a dream') + + # walk1 (this will be our expectation) + expected_dict = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # then + actual_dict = git.update_checksums_from( + [{'path': changed_path, 'action': 'M'}], + objects) + + self.assertEquals(expected_dict, actual_dict) + + @istest + def update_checksums_no_change(self): + # when + expected_dict = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # nothing changes on disk + + # then + actual_dict = git.update_checksums_from([], expected_dict) + + self.assertEquals(actual_dict, expected_dict) + + @istest + def update_checksums_delete_existing_file(self): + # make a temporary arborescence tree to hash without ignoring anything + # update the disk in some way (delete a file) + # update the actual git checksums from the deeper tree modified + + # when + objects = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # Remove folder + changed_path = os.path.join(self.tmp_root_path, + b'sample-folder/bar/barfoo') + shutil.rmtree(changed_path) + + # Actually walking the fs will be the resulting expectation + expected_dict = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # then + actual_dict = git.update_checksums_from( + [{'path': changed_path, 'action': 'D'}], + objects) + + self.assertEquals(actual_dict, expected_dict) + + @istest + def update_checksums_from_multiple_fs_modifications(self): + # make a temporary arborescence tree to hash without ignoring anything + # update the disk in some way (modify a file, add a new, delete one) + # update the actual git checksums from the deeper tree modified + + # when + objects = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # Actions on disk (imagine a checkout of some form) + + # 1. Create a new file + changed_path = os.path.join(self.tmp_root_path, + b'sample-folder/bar/barfoo/new') + with open(changed_path, 'wb') as f: + f.write(b'new line') + + # 2. update the existing file + changed_path1 = os.path.join( + self.tmp_root_path, + b'sample-folder/bar/barfoo/another-quote.org') + with open(changed_path1, 'wb') as f: + f.write(b'new line') + + # 3. Remove some folder + changed_path2 = os.path.join(self.tmp_root_path, + b'sample-folder/foo') + shutil.rmtree(changed_path2) + + # Actually walking the fs will be the resulting expectation + expected_dict = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # then + actual_dict = git.update_checksums_from( + [{'path': changed_path, 'action': 'A'}, + {'path': changed_path1, 'action': 'M'}, + {'path': changed_path2, 'action': 'D'}], + objects) + + self.assertEquals(expected_dict, actual_dict) + + @istest + def update_checksums_from_common_ancestor(self): + # when + # Add some new arborescence below a folder destined to be removed + # want to check that old keys does not remain + future_folder_to_remove = os.path.join(self.tmp_root_path, + b'sample-folder/bar/barfoo') + + # add .../barfoo/hello/world under (.../barfoo which will be destroyed) + new_folder = os.path.join(future_folder_to_remove, b'hello') + os.makedirs(new_folder, exist_ok=True) + with open(os.path.join(future_folder_to_remove, b'world'), 'wb') as f: + f.write(b"i'm sad 'cause i'm destined to be removed...") + + # now we scan the disk + objects = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + assert objects[future_folder_to_remove] + + # Actions on disk (to simulate a checkout of some sort) + + # 1. Create a new file + changed_path = os.path.join(self.tmp_root_path, + b'sample-folder/bar/barfoo/new') + with open(changed_path, 'wb') as f: + f.write(b'new line') + + # 2. update the existing file + changed_path1 = os.path.join( + self.tmp_root_path, + b'sample-folder/bar/barfoo/another-quote.org') + with open(changed_path1, 'wb') as f: + f.write(b'new line') + + # 3. Remove folder + shutil.rmtree(future_folder_to_remove) + + # Actually walking the fs will be the resulting expectation + expected_dict = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # then + actual_dict = git.update_checksums_from( + [{'path': changed_path, 'action': 'A'}, + {'path': changed_path1, 'action': 'M'}, + {'path': future_folder_to_remove, 'action': 'D'}], + objects) + + self.assertEquals(expected_dict, actual_dict) + + @istest + def update_checksums_detects_recomputation_from_all_is_needed(self): + # when + objects = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # Actions on disk (imagine a checkout of some form) + + # 1. Create a new file + changed_path = os.path.join(self.tmp_root_path, + b'new-file-at-root') + with open(changed_path, 'wb') as f: + f.write(b'new line') + + # 2. update the existing file + changed_path1 = os.path.join( + self.tmp_root_path, + b'sample-folder/bar/barfoo/another-quote.org') + with open(changed_path1, 'wb') as f: + f.write(b'new line') + + # 3. Remove some folder + changed_path2 = os.path.join(self.tmp_root_path, + b'sample-folder/foo') + + # 3. Remove some folder + changed_path2 = os.path.join(self.tmp_root_path, + b'sample-folder/bar/barfoo') + shutil.rmtree(changed_path2) + + # Actually walking the fs will be the resulting expectation + expected_dict = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path) + + # then + actual_dict = git.update_checksums_from( + [{'path': changed_path, 'action': 'A'}, + {'path': changed_path1, 'action': 'M'}, + {'path': changed_path2, 'action': 'D'}], + objects) + + self.assertEquals(expected_dict, actual_dict) + + @istest + def commonpath(self): + paths = ['r/0/h', + 'r/1/d', 'r/1/i/a', 'r/1/i/b', 'r/1/i/c', + 'r/2/e', 'r/2/f', 'r/2/g'] + self.assertEquals(git.commonpath(paths), 'r') + + paths = ['r/1/d', 'r/1/i/a', 'r/1/i/b', 'r/1/i/c'] + self.assertEquals(git.commonpath(paths), 'r/1') + + paths = ['/a/r/2/g', '/a/r/1/i/c', '/a/r/0/h'] + self.assertEquals(git.commonpath(paths), '/a/r') + + paths = [b'/a/r/2/g', b'/b/r/1/i/c', b'/c/r/0/h'] + self.assertEquals(git.commonpath(paths), b'/') + + paths = ['a/z', 'a/z', 'a/z'] + self.assertEquals(git.commonpath(paths), 'a/z') + + paths = ['0'] + self.assertEquals(git.commonpath(paths), '0') + + +def untar(archive, dest): + # cleanup + shutil.rmtree(dest) + os.mkdir(dest) + # untar + cmd = [b'tar', b'xf', archive, b'-C', dest] + subprocess.check_output(cmd) + + +def ignore_svn_folder(dirpath): + return b'.svn' not in dirpath + + +class GitHashUpdateRealUseCase(GitHashWalkArborescenceTree): + """Test `walk and git hash only on modified fs` functions. + + """ + def setUp(self): + self.tmp_root_path = tempfile.mkdtemp().encode('utf-8') + + archives_folder = os.path.join( + os.path.dirname(__file__).encode('utf-8'), + b'../../../..', + b'swh-storage-testdata', + b'svn-folders') + + self.pkg_doc_linux_r10 = os.path.join(archives_folder, + b'pkg-doc-linux-r10.tgz') + self.pkg_doc_linux_r11 = os.path.join(archives_folder, + b'pkg-doc-linux-r11.tgz') + self.pkg_doc_linux_r12 = os.path.join(archives_folder, + b'pkg-doc-linux-r12.tgz') + + def tearDown(self): + if os.path.exists(self.tmp_root_path): + shutil.rmtree(self.tmp_root_path) + + @istest + def use_case_1_r10_r11(self): + # given + # untar the svn revision 10 + untar(self.pkg_doc_linux_r10, self.tmp_root_path) + + objects_r10 = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path, + ignore_svn_folder) + + # untar the svn revision 11 + untar(self.pkg_doc_linux_r11, self.tmp_root_path) + + objects_r11 = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path, + ignore_svn_folder) + + assert objects_r10 != objects_r11 + + changes = [ + {'action': 'D', 'path': os.path.join(self.tmp_root_path, b'copyrights/non-free/Kiosk')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'copyrights/undistributable/Kiosk')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'copyrights/undistributable')}, # noqa + {'action': 'D', 'path': os.path.join(self.tmp_root_path, b'copyrights/non-free/UPS')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'copyrights/undistributable/UPS')} # noqa + ] + + # when + # update from objects from previous revision (r10) with + # actual changes from r10 to r11 + actual_objects = git.update_checksums_from(changes, + objects_r10, + ignore_svn_folder) + + # then + self.assertEquals(actual_objects, objects_r11) + + @istest + def use_case_2_r11_r12(self): + # given + # untar the svn revision 11 + untar(self.pkg_doc_linux_r11, self.tmp_root_path) + + objects_r11 = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path, + ignore_svn_folder) + + # untar the svn revision 12 + untar(self.pkg_doc_linux_r12, self.tmp_root_path) + + objects_r12 = git.walk_and_compute_sha1_from_directory( + self.tmp_root_path, + ignore_svn_folder) + + assert objects_r11 != objects_r12 + changes = [ + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk')}, # noqa + {'action': 'D', 'path': os.path.join(self.tmp_root_path, b'copyrights')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/copyright.head')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/split-package')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-base.faq')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/make-copyright')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-linux-html.menu')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/redirect.patch')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-linux-html.overrides')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-linux-html.prerm')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/README.updating')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-linux-html.preinst')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-linux-html.dirs')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/changelog')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-linux-text.README.Debian')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/html2docs')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/rules')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-linux-html.postrm')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/make-omf')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-linux-text.preinst')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-linux-html.postinst')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/copyrights')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/control')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-linux-text.dirs')}, # noqa + {'action': 'A', 'path': os.path.join(self.tmp_root_path, b'trunk/doc-linux/debian/doc-linux-html.README.Debian')} # noqa + ] + + # when + # update from objects from previous revision (r11) with + # actual changes from r11 to r12 + actual_objects = git.update_checksums_from(changes, + objects_r11, + ignore_svn_folder) + + # then + self.assertEquals(actual_objects, objects_r12) diff --git a/swh/model/tests/test_hashutil.py b/swh/model/tests/test_hashutil.py index 79cdc9ece21e9065a73f9296fbac729d6badd7ac..f795e87ea64f3e64a864dcef1f2da653e5f4cb70 100644 --- a/swh/model/tests/test_hashutil.py +++ b/swh/model/tests/test_hashutil.py @@ -92,6 +92,7 @@ class Hashutil(unittest.TestCase): f.close() hashes = hashutil.hash_path(f.name) + self.checksums['length'] = len(self.data) self.assertEquals(self.checksums, hashes) @istest diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py index da221df0119ac512fcaeed6be635075e7afbff1d..e1adfea34f08ac419af4fc424fb4460e3eaabc6e 100644 --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -219,6 +219,25 @@ class RevisionIdentifier(unittest.TestCase): linus_tz = datetime.timezone(datetime.timedelta(minutes=-420)) + gpgsig = b'''\ +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1.4.13 (Darwin) + +iQIcBAABAgAGBQJVJcYsAAoJEBiY3kIkQRNJVAUQAJ8/XQIfMqqC5oYeEFfHOPYZ +L7qy46bXHVBa9Qd8zAJ2Dou3IbI2ZoF6/Et89K/UggOycMlt5FKV/9toWyuZv4Po +L682wonoxX99qvVTHo6+wtnmYO7+G0f82h+qHMErxjP+I6gzRNBvRr+SfY7VlGdK +wikMKOMWC5smrScSHITnOq1Ews5pe3N7qDYMzK0XVZmgDoaem4RSWMJs4My/qVLN +e0CqYWq2A22GX7sXl6pjneJYQvcAXUX+CAzp24QnPSb+Q22Guj91TcxLFcHCTDdn +qgqMsEyMiisoglwrCbO+D+1xq9mjN9tNFWP66SQ48mrrHYTBV5sz9eJyDfroJaLP +CWgbDTgq6GzRMehHT3hXfYS5NNatjnhkNISXR7pnVP/obIi/vpWh5ll6Gd8q26z+ +a/O41UzOaLTeNI365MWT4/cnXohVLRG7iVJbAbCxoQmEgsYMRc/pBAzWJtLfcB2G +jdTswYL6+MUdL8sB9pZ82D+BP/YAdHe69CyTu1lk9RT2pYtI/kkfjHubXBCYEJSG ++VGllBbYG6idQJpyrOYNRJyrDi9yvDJ2W+S0iQrlZrxzGBVGTB/y65S8C+2WTBcE +lf1Qb5GDsQrZWgD+jtWTywOYHtCBwyCKSAXxSARMbNPeak9WPlcW/Jmu+fUcMe2x +dg1KdHOa34shrKDaOVzW +=od6m +-----END PGP SIGNATURE-----''' + self.revision = { 'id': 'bc0195aad0daa2ad5b0d76cce22b167bc3435590', 'directory': '85a74718d377195e1efd0843ba4f3260bad4fe07', @@ -238,6 +257,26 @@ class RevisionIdentifier(unittest.TestCase): 'message': b'Linux 4.2-rc2\n', } + self.revision_none_metadata = { + 'id': 'bc0195aad0daa2ad5b0d76cce22b167bc3435590', + 'directory': '85a74718d377195e1efd0843ba4f3260bad4fe07', + 'parents': ['01e2d0627a9a6edb24c37db45db5ecb31e9de808'], + 'author': { + 'name': b'Linus Torvalds', + 'email': b'torvalds@linux-foundation.org', + }, + 'date': datetime.datetime(2015, 7, 12, 15, 10, 30, + tzinfo=linus_tz), + 'committer': { + 'name': b'Linus Torvalds', + 'email': b'torvalds@linux-foundation.org', + }, + 'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30, + tzinfo=linus_tz), + 'message': b'Linux 4.2-rc2\n', + 'metadata': None, + } + self.synthetic_revision = { 'id': b'\xb2\xa7\xe1&\x04\x92\xe3D\xfa\xb3\xcb\xf9\x1b\xc1<\x91' b'\xe0T&\xfd', @@ -248,6 +287,7 @@ class RevisionIdentifier(unittest.TestCase): 'date': { 'timestamp': 1437047495.0, 'offset': 0, + 'negative_utc': False, }, 'type': 'tar', 'committer': { @@ -270,6 +310,141 @@ class RevisionIdentifier(unittest.TestCase): } + # cat commit.txt | git hash-object -t commit --stdin + self.revision_with_extra_headers = { + 'id': '010d34f384fa99d047cdd5e2f41e56e5c2feee45', + 'directory': '85a74718d377195e1efd0843ba4f3260bad4fe07', + 'parents': ['01e2d0627a9a6edb24c37db45db5ecb31e9de808'], + 'author': { + 'name': b'Linus Torvalds', + 'email': b'torvalds@linux-foundation.org', + 'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>', + }, + 'date': datetime.datetime(2015, 7, 12, 15, 10, 30, + tzinfo=linus_tz), + 'committer': { + 'name': b'Linus Torvalds', + 'email': b'torvalds@linux-foundation.org', + 'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>', + }, + 'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30, + tzinfo=linus_tz), + 'message': b'Linux 4.2-rc2\n', + 'metadata': { + 'extra_headers': [ + ['svn-repo-uuid', '046f1af7-66c2-d61b-5410-ce57b7db7bff'], + ['svn-revision', 10], + ] + } + } + + self.revision_with_gpgsig = { + 'id': '44cc742a8ca17b9c279be4cc195a93a6ef7a320e', + 'directory': 'b134f9b7dc434f593c0bab696345548b37de0558', + 'parents': ['689664ae944b4692724f13b709a4e4de28b54e57', + 'c888305e1efbaa252d01b4e5e6b778f865a97514'], + 'author': { + 'name': b'Jiang Xin', + 'email': b'worldhello.net@gmail.com', + 'fullname': b'Jiang Xin <worldhello.net@gmail.com>', + }, + 'date': { + 'timestamp': '1428538899', + 'offset': 480, + }, + 'committer': { + 'name': b'Jiang Xin', + 'email': b'worldhello.net@gmail.com', + }, + 'committer_date': { + 'timestamp': '1428538899', + 'offset': 480, + }, + 'metadata': { + 'extra_headers': [ + ['gpgsig', gpgsig], + ], + }, + 'message': b'''Merge branch 'master' of git://github.com/alexhenrie/git-po + +* 'master' of git://github.com/alexhenrie/git-po: + l10n: ca.po: update translation +''' + } + + self.revision_no_message = { + 'id': '4cfc623c9238fa92c832beed000ce2d003fd8333', + 'directory': 'b134f9b7dc434f593c0bab696345548b37de0558', + 'parents': ['689664ae944b4692724f13b709a4e4de28b54e57', + 'c888305e1efbaa252d01b4e5e6b778f865a97514'], + 'author': { + 'name': b'Jiang Xin', + 'email': b'worldhello.net@gmail.com', + 'fullname': b'Jiang Xin <worldhello.net@gmail.com>', + }, + 'date': { + 'timestamp': '1428538899', + 'offset': 480, + }, + 'committer': { + 'name': b'Jiang Xin', + 'email': b'worldhello.net@gmail.com', + }, + 'committer_date': { + 'timestamp': '1428538899', + 'offset': 480, + }, + 'message': None, + } + + self.revision_empty_message = { + 'id': '7442cd78bd3b4966921d6a7f7447417b7acb15eb', + 'directory': 'b134f9b7dc434f593c0bab696345548b37de0558', + 'parents': ['689664ae944b4692724f13b709a4e4de28b54e57', + 'c888305e1efbaa252d01b4e5e6b778f865a97514'], + 'author': { + 'name': b'Jiang Xin', + 'email': b'worldhello.net@gmail.com', + 'fullname': b'Jiang Xin <worldhello.net@gmail.com>', + }, + 'date': { + 'timestamp': '1428538899', + 'offset': 480, + }, + 'committer': { + 'name': b'Jiang Xin', + 'email': b'worldhello.net@gmail.com', + }, + 'committer_date': { + 'timestamp': '1428538899', + 'offset': 480, + }, + 'message': b'', + } + + self.revision_only_fullname = { + 'id': '010d34f384fa99d047cdd5e2f41e56e5c2feee45', + 'directory': '85a74718d377195e1efd0843ba4f3260bad4fe07', + 'parents': ['01e2d0627a9a6edb24c37db45db5ecb31e9de808'], + 'author': { + 'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>', + }, + 'date': datetime.datetime(2015, 7, 12, 15, 10, 30, + tzinfo=linus_tz), + 'committer': { + 'fullname': b'Linus Torvalds <torvalds@linux-foundation.org>', + }, + 'committer_date': datetime.datetime(2015, 7, 12, 15, 10, 30, + tzinfo=linus_tz), + 'message': b'Linux 4.2-rc2\n', + 'metadata': { + 'extra_headers': [ + ['svn-repo-uuid', '046f1af7-66c2-d61b-5410-ce57b7db7bff'], + ['svn-revision', 10], + ] + } + } + @istest def revision_identifier(self): self.assertEqual( @@ -277,6 +452,13 @@ class RevisionIdentifier(unittest.TestCase): identifiers.identifier_to_str(self.revision['id']), ) + @istest + def revision_identifier_none_metadata(self): + self.assertEqual( + identifiers.revision_identifier(self.revision_none_metadata), + identifiers.identifier_to_str(self.revision_none_metadata['id']), + ) + @istest def revision_identifier_synthetic(self): self.assertEqual( @@ -284,6 +466,51 @@ class RevisionIdentifier(unittest.TestCase): identifiers.identifier_to_str(self.synthetic_revision['id']), ) + @istest + def revision_identifier_with_extra_headers(self): + self.assertEqual( + identifiers.revision_identifier( + self.revision_with_extra_headers), + identifiers.identifier_to_str( + self.revision_with_extra_headers['id']), + ) + + @istest + def revision_identifier_with_gpgsig(self): + self.assertEqual( + identifiers.revision_identifier( + self.revision_with_gpgsig), + identifiers.identifier_to_str( + self.revision_with_gpgsig['id']), + ) + + @istest + def revision_identifier_no_message(self): + self.assertEqual( + identifiers.revision_identifier( + self.revision_no_message), + identifiers.identifier_to_str( + self.revision_no_message['id']), + ) + + @istest + def revision_identifier_empty_message(self): + self.assertEqual( + identifiers.revision_identifier( + self.revision_empty_message), + identifiers.identifier_to_str( + self.revision_empty_message['id']), + ) + + @istest + def revision_identifier_only_fullname(self): + self.assertEqual( + identifiers.revision_identifier( + self.revision_only_fullname), + identifiers.identifier_to_str( + self.revision_only_fullname['id']), + ) + class ReleaseIdentifier(unittest.TestCase): def setUp(self): @@ -331,6 +558,53 @@ o6X/3T+vm8K3bf3driRr34c= 'synthetic': False, } + self.release_no_message = { + 'id': 'b6f4f446715f7d9543ef54e41b62982f0db40045', + 'target': '9ee1c939d1cb936b1f98e8d81aeffab57bae46ab', + 'target_type': 'revision', + 'name': b'v2.6.12', + 'author': { + 'name': b'Linus Torvalds', + 'email': b'torvalds@g5.osdl.org', + }, + 'date': datetime.datetime(2005, 10, 27, 17, 2, 33, + tzinfo=linus_tz), + 'message': None, + } + + self.release_empty_message = { + 'id': '71a0aea72444d396575dc25ac37fec87ee3c6492', + 'target': '9ee1c939d1cb936b1f98e8d81aeffab57bae46ab', + 'target_type': 'revision', + 'name': b'v2.6.12', + 'author': { + 'name': b'Linus Torvalds', + 'email': b'torvalds@g5.osdl.org', + }, + 'date': datetime.datetime(2005, 10, 27, 17, 2, 33, + tzinfo=linus_tz), + 'message': b'', + } + + self.release_negative_utc = { + 'id': '97c8d2573a001f88e72d75f596cf86b12b82fd01', + 'name': b'20081029', + 'target': '54e9abca4c77421e2921f5f156c9fe4a9f7441c7', + 'target_type': 'revision', + 'date': { + 'timestamp': 1225281976.0, + 'offset': 0, + 'negative_utc': True, + }, + 'author': { + 'name': b'Otavio Salvador', + 'email': b'otavio@debian.org', + 'id': 17640, + }, + 'synthetic': False, + 'message': b'tagging version 20081029\n\nr56558\n', + } + @istest def release_identifier(self): self.assertEqual( @@ -344,3 +618,24 @@ o6X/3T+vm8K3bf3driRr34c= identifiers.release_identifier(self.release_no_author), identifiers.identifier_to_str(self.release_no_author['id']) ) + + @istest + def release_identifier_no_message(self): + self.assertEqual( + identifiers.release_identifier(self.release_no_message), + identifiers.identifier_to_str(self.release_no_message['id']) + ) + + @istest + def release_identifier_empty_message(self): + self.assertEqual( + identifiers.release_identifier(self.release_empty_message), + identifiers.identifier_to_str(self.release_empty_message['id']) + ) + + @istest + def release_identifier_negative_utc(self): + self.assertEqual( + identifiers.release_identifier(self.release_negative_utc), + identifiers.identifier_to_str(self.release_negative_utc['id']) + ) diff --git a/version.txt b/version.txt index d1537af5eba914df7cf369ccd8aff3411294f1fb..faf6cf6db4b4cd022a7f3f6e1ec4c2c79ad11a81 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.4-0-g696d23e \ No newline at end of file +v0.0.5-0-g0fbf74e \ No newline at end of file