Skip to content
Snippets Groups Projects
Commit a09e9b44 authored by Nicolas Dandrimont's avatar Nicolas Dandrimont
Browse files

identifiers: proper support for negative utc offsets

Move timestamp normalization to another function to make it more easily
movable.
parent 39c61bb6
No related branches found
No related tags found
No related merge requests found
...@@ -187,7 +187,7 @@ def format_date(date): ...@@ -187,7 +187,7 @@ def format_date(date):
@lru_cache() @lru_cache()
def format_offset(offset): def format_offset(offset, negative_utc=None):
"""Convert an integer number of minutes into an offset representation. """Convert an integer number of minutes into an offset representation.
The offset representation is [+-]hhmm where: The offset representation is [+-]hhmm where:
...@@ -196,10 +196,10 @@ def format_offset(offset): ...@@ -196,10 +196,10 @@ def format_offset(offset):
A null offset is represented as +0000. A null offset is represented as +0000.
""" """
if offset >= 0: if offset < 0 or offset == 0 and negative_utc:
sign = '+'
else:
sign = '-' sign = '-'
else:
sign = '+'
hours = abs(offset) // 60 hours = abs(offset) // 60
minutes = abs(offset) % 60 minutes = abs(offset) % 60
...@@ -208,40 +208,114 @@ def format_offset(offset): ...@@ -208,40 +208,114 @@ def format_offset(offset):
return t.encode() return t.encode()
def format_date_offset(date_offset): def normalize_timestamp(time_representation):
"""Format a date-compatible object with its timezone offset. """Normalize a time representation for processing by Software Heritage
This function supports a numeric timestamp (representing a number of
seconds since the UNIX epoch, 1970-01-01 at 00:00 UTC), a datetime.datetime
object (with timezone information), or a normalized Software
Heritage time representation (idempotency).
Args:
time_representation: the representation of a timestamp
Returns: a normalized dictionary with three keys
- timestamp: a number of seconds since the UNIX epoch (1970-01-01 at 00:00
UTC)
- offset: the timezone offset as a number of minutes relative to UTC
- negative_utc: a boolean representing whether the offset is -0000 when
offset = 0.
A date-compatible object is either:
- a dict with two members
timestamp: floating point number of seconds since the unix epoch
offset: (int) number of minutes representing the offset from UTC
- a datetime.datetime object with a timezone
- a numeric value (in which case the offset is hardcoded to 0)
""" """
# FIXME: move normalization to another module if time_representation is None:
return None
if isinstance(date_offset, dict): negative_utc = False
date = date_offset['timestamp']
offset = date_offset['offset'] if isinstance(time_representation, dict):
elif isinstance(date_offset, datetime.datetime): timestamp = time_representation['timestamp']
date = date_offset offset = time_representation['offset']
utcoffset = date_offset.utcoffset() if 'negative_utc' in time_representation:
negative_utc = time_representation['negative_utc']
elif isinstance(time_representation, datetime.datetime):
timestamp = time_representation.timestamp()
utcoffset = time_representation.utcoffset()
if utcoffset is None: if utcoffset is None:
raise ValueError('Received a datetime without a timezone') raise ValueError(
'normalize_timestamp received datetime without timezone: %s' %
time_representation)
# utcoffset is an integer number of minutes
seconds_offset = utcoffset.total_seconds() seconds_offset = utcoffset.total_seconds()
if seconds_offset - int(seconds_offset) != 0 or seconds_offset % 60:
raise ValueError('Offset is not an integer number of minutes')
offset = int(seconds_offset) // 60 offset = int(seconds_offset) // 60
else: else:
date = date_offset timestamp = time_representation
offset = 0 offset = 0
return b''.join([format_date(date), b' ', format_offset(offset)]) return {
'timestamp': timestamp,
'offset': offset,
'negative_utc': negative_utc,
}
def format_author_line(header, author, date_offset):
"""Format a an author line according to git standards.
An author line has four components:
- a header, describing the type of author (author, committer, tagger)
- a name, which is an arbitrary byte string
- an email, which is an arbitrary byte string too
- optionally, a timestamp with UTC offset specification
The author line is formatted thus:
`header` `name` <`email`>[ `timestamp` `utc_offset`]
def format_author(author): If name or email are empty, they are passed as is (so you can find author
return b''.join([author['name'], b' <', author['email'], b'>']) lines with empty square brackets or two spaces between the header and the
opening bracket).
The timestamp is encoded as a (decimal) number of seconds since the UNIX
epoch (1970-01-01 at 00:00 UTC). As an extension to the git format, we
support fractional timestamps, using a dot as the separator for the decimal
part.
The utc offset is a number of minutes encoded as '[+-]HHMM'. Note some
tools can pass a negative offset corresponding to the UTC timezone
('-0000'), which is valid and is encoded as such.
For convenience, this function returns the whole line with its trailing
newline.
Args:
header: the header of the author line (one of 'author', 'committer',
'tagger')
author: an author specification (dict with two bytes values: name and
email)
date_offset: a normalized date/time representation as returned by
`normalize_timestamp`.
Returns:
the newline-terminated byte string containing the author line
"""
ret = [header.encode(), b' ', author['name'], b' <', author['email'], b'>']
date_offset = normalize_timestamp(date_offset)
if date_offset is not None:
date_f = format_date(date_offset['timestamp'])
offset_f = format_offset(date_offset['offset'],
date_offset['negative_utc'])
ret.extend([b' ', date_f, b' ', offset_f])
ret.append(b'\n')
return b''.join(ret)
def revision_identifier(revision): def revision_identifier(revision):
...@@ -305,10 +379,9 @@ def revision_identifier(revision): ...@@ -305,10 +379,9 @@ def revision_identifier(revision):
]) ])
components.extend([ components.extend([
b'author ', format_author(revision['author']), format_author_line('author', revision['author'], revision['date']),
b' ', format_date_offset(revision['date']), b'\n', format_author_line('committer', revision['committer'],
b'committer ', format_author(revision['committer']), revision['committer_date']),
b' ', format_date_offset(revision['committer_date']), b'\n',
]) ])
# Handle extra headers # Handle extra headers
...@@ -357,10 +430,9 @@ def release_identifier(release): ...@@ -357,10 +430,9 @@ def release_identifier(release):
] ]
if 'author' in release and release['author']: if 'author' in release and release['author']:
components.extend([ components.append(
b'tagger ', format_author(release['author']), b' ', format_author_line('tagger', release['author'], release['date'])
format_date_offset(release['date']), b'\n', )
])
if release['message'] is not None: if release['message'] is not None:
components.extend([b'\n', release['message']]) components.extend([b'\n', release['message']])
......
...@@ -267,6 +267,7 @@ dg1KdHOa34shrKDaOVzW ...@@ -267,6 +267,7 @@ dg1KdHOa34shrKDaOVzW
'date': { 'date': {
'timestamp': 1437047495.0, 'timestamp': 1437047495.0,
'offset': 0, 'offset': 0,
'negative_utc': False,
}, },
'type': 'tar', 'type': 'tar',
'committer': { 'committer': {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment