Skip to content
Snippets Groups Projects
Verified Commit 3d473c30 authored by Antoine R. Dumont's avatar Antoine R. Dumont
Browse files

lister: Type correctly the 'indexable' column

instead of converting that column as a string

As a side effect, bitbucket wise, we provided improperly the after query
parameter as a date not url encoded. This resulted in improper api response from
bitbucket's (we received from time to time the same next index as the current
one).

Related T1826
parent b99617f9
No related branches found
No related tags found
1 merge request!377lister: Type correctly the 'indexable' column
......@@ -6,7 +6,6 @@ import logging
import iso8601
from datetime import datetime
from urllib import parse
from swh.lister.bitbucket.models import BitBucketModel
......@@ -15,7 +14,6 @@ from swh.lister.core.indexing_lister import IndexingHttpLister
logger = logging.getLogger(__name__)
DEFAULT_BITBUCKET_PAGE = 10
......@@ -24,7 +22,7 @@ class BitBucketLister(IndexingHttpLister):
MODEL = BitBucketModel
LISTER_NAME = 'bitbucket'
instance = 'bitbucket'
default_min_bound = datetime.utcfromtimestamp(0).isoformat()
default_min_bound = datetime.utcfromtimestamp(0)
def __init__(self, api_baseurl, override_config=None, per_page=100):
super().__init__(
......@@ -39,7 +37,7 @@ class BitBucketLister(IndexingHttpLister):
def get_model_from_repo(self, repo):
return {
'uid': repo['uuid'],
'indexable': repo['created_on'],
'indexable': iso8601.parse_date(repo['created_on']),
'name': repo['name'],
'full_name': repo['full_name'],
'html_url': repo['links']['html']['href'],
......@@ -48,38 +46,38 @@ class BitBucketLister(IndexingHttpLister):
}
def get_next_target_from_response(self, response):
"""This will read the 'next' link from the api response if any
and return it as a datetime.
Args:
reponse (Response): requests' response from api call
Returns:
next date as a datetime
"""
body = response.json()
if 'next' in body:
return parse.unquote(body['next'].split('after=')[1])
next_ = body.get('next')
if next_ is not None:
next_ = parse.urlparse(next_)
return iso8601.parse_date(parse.parse_qs(next_.query)['after'][0])
def transport_response_simplified(self, response):
repos = response.json()['values']
return [self.get_model_from_repo(repo) for repo in repos]
def request_uri(self, identifier):
identifier = parse.quote(identifier.isoformat())
return super().request_uri(identifier or '1970-01-01')
def is_within_bounds(self, inner, lower=None, upper=None):
# values are expected to be str dates
try:
inner = iso8601.parse_date(inner)
if lower:
lower = iso8601.parse_date(lower)
if upper:
upper = iso8601.parse_date(upper)
if lower is None and upper is None:
return True
elif lower is None:
ret = inner <= upper
elif upper is None:
ret = inner >= lower
else:
ret = lower <= inner <= upper
except Exception as e:
logger.error(str(e) + ': %s, %s, %s',
('inner=%s%s' % (type(inner), inner)),
('lower=%s%s' % (type(lower), lower)),
('upper=%s%s' % (type(upper), upper)))
raise
# values are expected to be datetimes
if lower is None and upper is None:
ret = True
elif lower is None:
ret = inner <= upper
elif upper is None:
ret = inner >= lower
else:
ret = lower <= inner <= upper
return ret
......@@ -2,7 +2,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from sqlalchemy import Column, String
from sqlalchemy import Column, String, DateTime
from swh.lister.core.models import IndexingModelBase
......@@ -12,4 +12,4 @@ class BitBucketModel(IndexingModelBase):
__tablename__ = 'bitbucket_repo'
uid = Column(String, primary_key=True)
indexable = Column(String, index=True)
indexable = Column(DateTime(timezone=True), index=True)
# Copyright (C) 2017-2018 the Software Heritage developers
# Copyright (C) 2017-2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
import unittest
from datetime import timedelta
from urllib.parse import unquote
import iso8601
import requests_mock
from swh.lister.bitbucket.lister import BitBucketLister
from swh.lister.core.tests.test_lister import HttpListerTester
def convert_type(req_index):
"""Convert the req_index to its right type according to the model's
"indexable" column.
"""
return iso8601.parse_date(unquote(req_index))
class BitBucketListerTester(HttpListerTester, unittest.TestCase):
Lister = BitBucketLister
test_re = re.compile(r'/repositories\?after=([^?&]+)')
lister_subdir = 'bitbucket'
good_api_response_file = 'api_response.json'
bad_api_response_file = 'api_empty_response.json'
first_index = '2008-07-12T07:44:01.476818+00:00'
last_index = '2008-07-19T06:16:43.044743+00:00'
first_index = convert_type('2008-07-12T07:44:01.476818+00:00')
last_index = convert_type('2008-07-19T06:16:43.044743+00:00')
entries_per_page = 10
convert_type = staticmethod(convert_type)
@requests_mock.Mocker()
def test_fetch_none_nodb(self, http_mocker):
"""Overridden because index is not an integer nor a string
"""
http_mocker.get(self.test_re, text=self.mock_response)
fl = self.get_fl()
self.disable_scheduler(fl)
self.disable_db(fl)
# stores no results
fl.run(min_bound=self.first_index - timedelta(days=3),
max_bound=self.first_index)
def test_is_within_bounds(self):
fl = self.get_fl()
self.assertTrue(fl.is_within_bounds(
'2008-07-15', self.first_index, self.last_index))
iso8601.parse_date('2008-07-15'),
self.first_index, self.last_index))
self.assertFalse(fl.is_within_bounds(
'2008-07-20', self.first_index, self.last_index))
iso8601.parse_date('2008-07-20'),
self.first_index, self.last_index))
self.assertFalse(fl.is_within_bounds(
'2008-07-11', self.first_index, self.last_index))
iso8601.parse_date('2008-07-11'),
self.first_index, self.last_index))
......@@ -38,6 +38,12 @@ class HttpListerTesterBase(abc.ABC):
first_index = AbstractAttribute('First index in good_api_response')
entries_per_page = AbstractAttribute('Number of results in good response')
LISTER_NAME = 'fake-lister'
convert_type = str
"""static method used to convert the "request_index" to its right type (for
indexing listers for example, this is in accordance with the model's
"indexable" column).
"""
# May need to override this if the headers are used for something
def response_headers(self, request):
......@@ -66,7 +72,7 @@ class HttpListerTesterBase(abc.ABC):
def request_index(self, request):
m = self.test_re.search(request.path_url)
if m and (len(m.groups()) > 0):
return m.group(1)
return self.convert_type(m.group(1))
def mock_response(self, request, context):
self.fl.reset_backoff()
......@@ -76,7 +82,7 @@ class HttpListerTesterBase(abc.ABC):
context.headers.update(custom_headers)
req_index = self.request_index(request)
if req_index == str(self.first_index):
if req_index == self.first_index:
response_file = self.good_api_response_file
else:
response_file = self.bad_api_response_file
......
# Copyright (C) 2017-2018 the Software Heritage developers
# Copyright (C) 2017-2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -19,10 +19,11 @@ class GitHubListerTester(HttpListerTester, unittest.TestCase):
first_index = 26
last_index = 368
entries_per_page = 100
convert_type = int
def response_headers(self, request):
headers = {'X-RateLimit-Remaining': '1'}
if self.request_index(request) == str(self.first_index):
if self.request_index(request) == self.first_index:
headers.update({
'Link': '<https://api.github.com/repositories?since=367>;'
' rel="next",'
......
# Copyright (C) 2017-2018 the Software Heritage developers
# Copyright (C) 2017-2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -18,10 +18,11 @@ class GitLabListerTester(HttpListerTesterBase, unittest.TestCase):
bad_api_response_file = 'api_empty_response.json'
first_index = 1
entries_per_page = 10
convert_type = int
def response_headers(self, request):
headers = {'RateLimit-Remaining': '1'}
if self.request_index(request) == str(self.first_index):
if self.request_index(request) == self.first_index:
headers.update({
'x-next-page': '3',
})
......
# Copyright (C) 2018 the Software Heritage developers
# Copyright (C) 2018-2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
......@@ -33,7 +33,7 @@ class NpmIncrementalListerTester(HttpListerTesterBase, unittest.TestCase):
lister_subdir = 'npm'
good_api_response_file = 'api_inc_response.json'
bad_api_response_file = 'api_inc_empty_response.json'
first_index = 6920642
first_index = '6920642'
entries_per_page = 100
@requests_mock.Mocker()
......
......@@ -21,6 +21,7 @@ class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
first_index = 1
last_index = 12
entries_per_page = 10
convert_type = int
def get_fl(self, override_config=None):
"""(Override) Retrieve an instance of fake lister (fl).
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment