From 1d898f7f3790ffbec77849e8f971512cea663391 Mon Sep 17 00:00:00 2001 From: Stefano Zacchiroli <zack@upsilon.cc> Date: Wed, 6 Sep 2017 20:22:30 +0200 Subject: [PATCH] sanitize docstrings for sphinx --- swh/model/git.py | 115 +++++++++++++++--------------- swh/model/identifiers.py | 147 +++++++++++++++++++++------------------ 2 files changed, 136 insertions(+), 126 deletions(-) diff --git a/swh/model/git.py b/swh/model/git.py index ad5962f5..d46a60bf 100644 --- a/swh/model/git.py +++ b/swh/model/git.py @@ -36,7 +36,8 @@ def _compute_directory_git_sha1(hashes): """Compute a directory git sha1 from hashes. Args: - hashes: list of tree entries with keys: + hashes (list): list of tree entries with the following keys: + - sha1_git: the tree entry's sha1 - name: file or subdir's name - perms: the tree entry's sha1 permissions @@ -68,7 +69,8 @@ def compute_directory_git_sha1(dirpath, hashes): Args: dirpath: the directory's absolute path - hashes: list of tree entries with keys: + hashes (list): list of tree entries with keys: + - sha1_git: the tree entry's sha1 - name: file or subdir's name - perms: the tree entry's sha1 permissions @@ -88,8 +90,8 @@ def compute_revision_sha1_git(revision): Args: revision: Additional dictionary information needed to compute a - synthetic - revision. Following keys are expected: + synthetic revision. The following keys are expected: + - author - date - committer @@ -112,7 +114,8 @@ def compute_release_sha1_git(release): Args: release: Additional dictionary information needed to compute a - synthetic release. Following keys are expected: + synthetic release. Following keys are expected: + - name - message - date @@ -133,7 +136,8 @@ def compute_link_metadata(linkpath): linkpath: absolute pathname of the link Returns: - Dictionary of values: + dict: Dictionary of values with the following keys: + - data: link's content - length: link's content length - name: basename of the link @@ -166,7 +170,8 @@ def compute_blob_metadata(filepath): filepath: absolute pathname of the regular file. Returns: - Dictionary of values: + dict: Dictionary of values with the following keys: + - name: basename of the file - length: data length - perms: git permission for file @@ -198,13 +203,15 @@ def _compute_tree_metadata(dirname, hashes): Args: dirname: absolute pathname of the directory. - hashes: list of tree dirname's entries with keys: + hashes (list): list of tree dirname's entries with keys: + - sha1_git: the tree entry's sha1 - name: file or subdir's name - perms: the tree entry's sha1 permissions Returns: - Dictionary of values: + dict: Dictionary of values with the following keys: + - sha1_git: tree's sha1 git - name: basename of the directory - perms: git permission for directory @@ -229,7 +236,8 @@ def compute_tree_metadata(dirname, ls_hashes): ls_hashes: dictionary of path, hashes Returns: - Dictionary of values: + dict: Dictionary of values with the following keys: + - sha1_git: tree's sha1 git - name: basename of the directory - perms: git permission for directory @@ -266,11 +274,11 @@ def _walk(rootdir, Args: - rootdir: starting walk root directory path - - dir_ok_fn: validation function. if folder encountered are - not ok, they are ignored. Default to default_validation_dir - which does nothing. - - remove_empty_folder: Flag to remove and ignore any - encountered empty folders. + - dir_ok_fn: validation function. if folder encountered are not ok, + they are ignored. Default to default_validation_dir which does + nothing. + - remove_empty_folder: Flag to remove and ignore any encountered empty + folders. Yields: 3 tuples dirpath, set of absolute children dirname paths, set @@ -356,34 +364,28 @@ def walk_and_compute_sha1_from_directory(rootdir, Compute git sha1 from directory rootdir. Args: - - rootdir: Root directory from which beginning the git hash computation - - - dir_ok_fn: Filter function to filter directory according to rules - defined in the function. By default, all folders are ok. - Example override: dir_ok_fn = lambda dirpath: b'svn' not in dirpath - - - with_root_tree: Determine if we compute the upper root tree's - checksums. As a default, we want it. One possible use case where this - is not useful is the update (cf. `update_checksums_from`) + rootdir: Root directory from which beginning the git hash computation + dir_ok_fn: Filter function to filter directory according to rules + defined in the function. By default, all folders are ok. Example + override: ``dir_ok_fn = lambda dirpath: b'svn' not in dirpath`` + with_root_tree: Determine if we compute the upper root tree's + checksums. As a default, we want it. One possible use case where + this is not useful is the update (cf. `update_checksums_from`) Returns: - Dictionary of entries with keys <path-name> and as values a list of - directory entries. - Those are list of dictionary with keys: - - 'perms' - - 'type' - - 'name' - - 'sha1_git' - - and specifically content: 'sha1', 'sha256', ... + dict: Dictionary of entries with keys <path-name> and as values a list + of directory entries. Those are list of dictionary with keys: + + - perms + - type + - name + - sha1_git + - and specifically for content: sha1, sha256, etc. Note: One special key is ROOT_TREE_KEY to indicate the upper root of the directory (this is the revision's directory). - Raises: - Nothing - If something is raised, this is a programmatic error. - """ ls_hashes = {} all_links = set() @@ -438,34 +440,31 @@ def compute_hashes_from_directory(rootdir, """Compute git sha1 from directory rootdir. Args: - - rootdir: Root directory from which beginning the git hash - computation - - - dir_ok_fn: Filter function to filter directory according to rules - defined in the function. By default, all folders are ok. - Example override: dir_ok_fn = lambda dirpath: b'svn' not in dirpath + rootdir: Root directory from which beginning the git hash + computation + dir_ok_fn: Filter function to filter directory according to rules + defined in the function. By default, all folders are ok. Example + override: ``dir_ok_fn = lambda dirpath: b'svn' not in dirpath`` Returns: - Dictionary of entries with keys absolute path name. + dict: Dictionary of entries with keys absolute path name. Path-name can be a file/link or directory. - The associated value is a dictionary with: + The associated value is a dictionary with keys: + - checksums: the dictionary with the hashes for the link/file/dir + Those are list of dictionary with keys: - - 'perms' - - 'type' - - 'name' - - 'sha1_git' - - and specifically content: 'sha1', 'sha256', ... + - 'perms' + - 'type' + - 'name' + - 'sha1_git' + - and specifically for content: sha1, sha256, etc. - children: Only for a directory, the set of children paths Note: - One special key is the / which indicates the upper root of - the directory (this is the revision's directory). - - Raises: - Nothing - If something is raised, this is a programmatic error. + One special key is the / which indicates the upper root of the + directory (this is the revision's directory). """ def _get_dict_from_dirpath(_dict, path): @@ -547,7 +546,7 @@ def children_hashes(children, objects): hashes. Args: - objects: objects hash as returned by git.compute_hashes_from_directory. + objects: objects hash as returned by git.compute_hashes_from_directory children: collection of bytes path Yields: @@ -564,8 +563,8 @@ def children_hashes(children, objects): def objects_per_type(filter_type, objects_per_path): """Given an object dictionary returned by - `swh.model.git.compute_hashes_from_directory`, yields - corresponding element type's hashes + :py:func:`compute_hashes_from_directory`, yields corresponding element + type's hashes Args: filter_type: one of GitType enum diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py index d51304e7..c7a6ce96 100644 --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -16,12 +16,12 @@ def identifier_to_bytes(identifier): Args: identifier: an identifier, either a 40-char hexadecimal string or a - bytes object of length 20 + bytes object of length 20 Returns: The length 20 bytestring corresponding to the given identifier Raises: - ValueError if the identifier is of an unexpected type or length. + ValueError: if the identifier is of an unexpected type or length. """ if isinstance(identifier, bytes): @@ -48,7 +48,8 @@ def identifier_to_str(identifier): Args: identifier: an identifier, either a 40-char hexadecimal string or a - bytes object of length 20 + bytes object of length 20 + Returns: The length 40 string corresponding to the given identifier, hex encoded @@ -87,7 +88,7 @@ def content_identifier(content): A dictionary with all the hashes for the data Raises: - KeyError if the content doesn't have a data member. + KeyError: if the content doesn't have a data member. """ @@ -113,7 +114,9 @@ def escape_newlines(snippet): """Escape the newlines present in snippet according to git rules. New lines in git manifests are escaped by indenting the next line by one - space.""" + space. + + """ if b'\n' in snippet: return b'\n '.join(snippet.split(b'\n')) @@ -129,27 +132,30 @@ def directory_identifier(directory): trees: 1. Entries of the directory are sorted using the name (or the name with '/' - appended for directory entries) as key, in bytes order. + appended for directory entries) as key, in bytes order. 2. For each entry of the directory, the following bytes are output: - - the octal representation of the permissions for the entry - (stored in the 'perms' member), which is a representation of the - entry type: - b'100644' (int 33188) for files - b'100755' (int 33261) for executable files - b'120000' (int 40960) for symbolic links - b'40000' (int 16384) for directories - b'160000' (int 57344) for references to revisions - - an ascii space (b'\x20') - - the entry's name (as raw bytes), stored in the 'name' member - - a null byte (b'\x00') - - the 20 byte long identifier of the object pointed at by the entry, - stored in the 'target' member: - for files or executable files: their blob sha1_git - for symbolic links: the blob sha1_git of a file containing the - link destination - for directories: their intrinsic identifier - for revisions: their intrinsic identifier + + - the octal representation of the permissions for the entry (stored in + the 'perms' member), which is a representation of the entry type: + + - b'100644' (int 33188) for files + - b'100755' (int 33261) for executable files + - b'120000' (int 40960) for symbolic links + - b'40000' (int 16384) for directories + - b'160000' (int 57344) for references to revisions + + - an ascii space (b'\x20') + - the entry's name (as raw bytes), stored in the 'name' member + - a null byte (b'\x00') + - the 20 byte long identifier of the object pointed at by the entry, + stored in the 'target' member: + + - for files or executable files: their blob sha1_git + - for symbolic links: the blob sha1_git of a file containing the link + destination + - for directories: their intrinsic identifier + - for revisions: their intrinsic identifier (Note that there is no separator between entries) @@ -200,8 +206,9 @@ def format_offset(offset, negative_utc=None): """Convert an integer number of minutes into an offset representation. The offset representation is [+-]hhmm where: - hh is the number of hours; - mm is the number of minutes. + + - hh is the number of hours; + - mm is the number of minutes. A null offset is represented as +0000. """ @@ -221,21 +228,25 @@ def normalize_timestamp(time_representation): """Normalize a time representation for processing by Software Heritage This function supports a numeric timestamp (representing a number of - seconds since the UNIX epoch, 1970-01-01 at 00:00 UTC), a datetime.datetime - object (with timezone information), or a normalized Software - Heritage time representation (idempotency). + seconds since the UNIX epoch, 1970-01-01 at 00:00 UTC), a + :obj:`datetime.datetime` object (with timezone information), or a + normalized Software Heritage time representation (idempotency). Args: time_representation: the representation of a timestamp - Returns: a normalized dictionary with three keys + Returns: + dict: a normalized dictionary with three keys: + + - timestamp: a dict with two optional keys: + + - seconds: the integral number of seconds since the UNIX epoch + - microseconds: the integral number of microseconds - - timestamp: a dict with two optional keys: - - seconds: the integral number of seconds since the UNIX epoch - - microseconds: the integral number of microseconds - - offset: the timezone offset as a number of minutes relative to UTC - - negative_utc: a boolean representing whether the offset is -0000 when - offset = 0. + - offset: the timezone offset as a number of minutes relative to + UTC + - negative_utc: a boolean representing whether the offset is -0000 + when offset = 0. """ @@ -321,11 +332,12 @@ def format_author_line(header, author, date_offset): """Format a an author line according to git standards. An author line has three components: - - a header, describing the type of author (author, committer, tagger) - - a name and email, which is an arbitrary bytestring - - optionally, a timestamp with UTC offset specification - The author line is formatted thus: + - a header, describing the type of author (author, committer, tagger) + - a name and email, which is an arbitrary bytestring + - optionally, a timestamp with UTC offset specification + + The author line is formatted thus:: `header` `name and email`[ `timestamp` `utc_offset`] @@ -343,11 +355,11 @@ def format_author_line(header, author, date_offset): Args: header: the header of the author line (one of 'author', 'committer', - 'tagger') + 'tagger') author: an author specification (dict with two bytes values: name and - email, or byte value) + email, or byte value) date_offset: a normalized date/time representation as returned by - `normalize_timestamp`. + :func:`normalize_timestamp`. Returns: the newline-terminated byte string containing the author line @@ -373,37 +385,36 @@ def revision_identifier(revision): """Return the intrinsic identifier for a revision. The fields used for the revision identifier computation are: - - directory - - parents - - author - - author_date - - committer - - committer_date - - metadata -> extra_headers - - message + + - directory + - parents + - author + - author_date + - committer + - committer_date + - metadata -> extra_headers + - message A revision's identifier is the 'git'-checksum of a commit manifest - constructed as follows (newlines are a single ASCII newline character): - - ``` - tree <directory identifier> - [for each parent in parents] - parent <parent identifier> - [end for each parents] - author <author> <author_date> - committer <committer> <committer_date> - [for each key, value in extra_headers] - <key> <encoded value> - [end for each extra_headers] - - <message> - ``` + constructed as follows (newlines are a single ASCII newline character):: + + tree <directory identifier> + [for each parent in parents] + parent <parent identifier> + [end for each parents] + author <author> <author_date> + committer <committer> <committer_date> + [for each key, value in extra_headers] + <key> <encoded value> + [end for each extra_headers] + + <message> The directory identifier is the ascii representation of its hexadecimal encoding. - Author and committer are formatted with the `format_author` function. - Dates are formatted with the `format_date_offset` function. + Author and committer are formatted with the :func:`format_author` function. + Dates are formatted with the :func:`format_offset` function. Extra headers are an ordered list of [key, value] pairs. Keys are strings and get encoded to utf-8 for identifier computation. Values are either byte -- GitLab