From e7668999bcfad88dedc656cdbcf3a8c55bcc04c9 Mon Sep 17 00:00:00 2001 From: Valentin Lorentz <vlorentz@softwareheritage.org> Date: Mon, 28 Oct 2019 13:53:06 +0100 Subject: [PATCH] Rename 'cursor' to 'scroll_token'. 'cursor' has a different meaning in our components using postgresql; this renaming will avoid confusion when adding token-based pagination to these components. --- swh/search/elasticsearch.py | 23 ++++++++++++----------- swh/search/in_memory.py | 17 +++++++++-------- swh/search/tests/test_cli.py | 2 +- swh/search/tests/test_search.py | 24 ++++++++++++------------ swh/search/utils.py | 12 ++++++------ 5 files changed, 40 insertions(+), 38 deletions(-) diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py index 8c5874c..9aef7ae 100644 --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -107,25 +107,25 @@ class ElasticSearch: def origin_search( self, *, url_pattern: str = None, metadata_pattern: str = None, - cursor: str = None, count: int = 50 + scroll_token: str = None, count: int = 50 ) -> Dict[str, object]: """Searches for origins matching the `url_pattern`. Args: url_pattern (str): Part of thr URL to search for - cursor (str): `cursor` is opaque value used for pagination. + scroll_token (str): `scroll_token` is an opaque value used for + pagination. count (int): number of results to return. Returns: a dictionary with keys: - * `cursor`: + * `scroll_token`: opaque value used for fetching more results. `None` if there are no more result. * `results`: list of dictionaries with key: * `url`: URL of a matching origin """ - # TODO: find a better name for "cursor" query_clauses = [] if url_pattern: @@ -171,11 +171,11 @@ class ElasticSearch: {'_id': 'asc'}, ] } - if cursor: + if scroll_token: # TODO: use ElasticSearch's scroll API? - cursor = msgpack.loads(base64.b64decode(cursor)) + scroll_token = msgpack.loads(base64.b64decode(scroll_token)) body['search_after'] = \ - [cursor[b'score'], cursor[b'id'].decode('ascii')] + [scroll_token[b'score'], scroll_token[b'id'].decode('ascii')] res = self._backend.search( index='origin', @@ -187,16 +187,17 @@ class ElasticSearch: if len(hits) == count: last_hit = hits[-1] - next_cursor = { + next_scroll_token = { b'score': last_hit['_score'], b'id': last_hit['_id'], } - next_cursor = base64.b64encode(msgpack.dumps(next_cursor)) + next_scroll_token = base64.b64encode(msgpack.dumps( + next_scroll_token)) else: - next_cursor = None + next_scroll_token = None return { - 'cursor': next_cursor, + 'scroll_token': next_scroll_token, 'results': [ { # TODO: also add 'id'? diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py index 8eae987..4592457 100644 --- a/swh/search/in_memory.py +++ b/swh/search/in_memory.py @@ -61,7 +61,7 @@ class InMemorySearch: def origin_search( self, *, url_pattern: str = None, metadata_pattern: str = None, - cursor: str = None, count: int = 50 + scroll_token: str = None, count: int = 50 ) -> Dict[str, object]: matches = (self._origins[id_] for id_ in self._origin_ids) @@ -91,9 +91,9 @@ class InMemorySearch: 'At least one of url_pattern and metadata_pattern ' 'must be provided.') - if cursor: - cursor = msgpack.loads(base64.b64decode(cursor)) - start_at_index = cursor[b'start_at_index'] + if scroll_token: + scroll_token = msgpack.loads(base64.b64decode(scroll_token)) + start_at_index = scroll_token[b'start_at_index'] else: start_at_index = 0 @@ -101,15 +101,16 @@ class InMemorySearch: matches, start_at_index, start_at_index+count)) if len(hits) == count: - next_cursor = { + next_scroll_token = { b'start_at_index': start_at_index+count, } - next_cursor = base64.b64encode(msgpack.dumps(next_cursor)) + next_scroll_token = base64.b64encode(msgpack.dumps( + next_scroll_token)) else: - next_cursor = None + next_scroll_token = None return { - 'cursor': next_cursor, + 'scroll_token': next_scroll_token, 'results': [ {'url': hit['url']} for hit in hits diff --git a/swh/search/tests/test_cli.py b/swh/search/tests/test_cli.py index e7cf7f7..15bd576 100644 --- a/swh/search/tests/test_cli.py +++ b/swh/search/tests/test_cli.py @@ -82,5 +82,5 @@ class CliTestCase(BaseElasticsearchTest): assert result.output == expected_output results = self.search.origin_search(url_pattern='foobar') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://foobar.baz'}]} diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py index e25f03e..4c4e2e9 100644 --- a/swh/search/tests/test_search.py +++ b/swh/search/tests/test_search.py @@ -17,20 +17,20 @@ class CommonSearchTest: ]) results = self.search.origin_search(url_pattern='foobar') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://foobar.baz'}]} results = self.search.origin_search(url_pattern='barb') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://barbaz.qux'}]} # 'bar' is part of 'foobar', but is not the beginning of it results = self.search.origin_search(url_pattern='bar') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://barbaz.qux'}]} results = self.search.origin_search(url_pattern='barbaz') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://barbaz.qux'}]} def test_origin_url_unique_word_prefix_multiple_results(self): @@ -41,14 +41,14 @@ class CommonSearchTest: ]) results = self.search.origin_search(url_pattern='qu') - assert results['cursor'] is None + assert results['scroll_token'] is None results = [res['url'] for res in results['results']] expected_results = ['http://qux.quux', 'http://barbaz.qux'] assert sorted(results) == sorted(expected_results) results = self.search.origin_search(url_pattern='qux') - assert results['cursor'] is None + assert results['scroll_token'] is None results = [res['url'] for res in results['results']] expected_results = ['http://barbaz.qux', 'http://qux.quux'] @@ -77,16 +77,16 @@ class CommonSearchTest: ]) results = self.search.origin_search(metadata_pattern='foo') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin2'}]} # ES returns both results, because blahblah results = self.search.origin_search(metadata_pattern='foo bar') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin2'}, {'url': 'http://origin3'}]} results = self.search.origin_search(metadata_pattern='bar baz') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin3'}, {'url': 'http://origin2'}]} def test_origin_intrinsic_metadata_nested(self): @@ -112,15 +112,15 @@ class CommonSearchTest: ]) results = self.search.origin_search(metadata_pattern='foo') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin2'}]} results = self.search.origin_search(metadata_pattern='foo bar') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin2'}, {'url': 'http://origin3'}]} results = self.search.origin_search(metadata_pattern='bar baz') - assert results == {'cursor': None, 'results': [ + assert results == {'scroll_token': None, 'results': [ {'url': 'http://origin3'}, {'url': 'http://origin2'}]} # TODO: add more tests with more codemeta terms diff --git a/swh/search/utils.py b/swh/search/utils.py index a91c98a..19b6551 100644 --- a/swh/search/utils.py +++ b/swh/search/utils.py @@ -5,12 +5,12 @@ def stream_results(f, *args, **kwargs): - if 'cursor' in kwargs: - raise TypeError('stream_results has no argument "cursor".') - cursor = None + if 'scroll_token' in kwargs: + raise TypeError('stream_results has no argument "scroll_token".') + scroll_token = None while True: - results = f(*args, cursor=cursor, **kwargs) + results = f(*args, scroll_token=scroll_token, **kwargs) yield from results['results'] - cursor = results['cursor'] - if cursor is None: + scroll_token = results['scroll_token'] + if scroll_token is None: break -- GitLab