From e7668999bcfad88dedc656cdbcf3a8c55bcc04c9 Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Mon, 28 Oct 2019 13:53:06 +0100
Subject: [PATCH] Rename 'cursor' to 'scroll_token'.

'cursor' has a different meaning in our components using postgresql;
this renaming will avoid confusion when adding token-based pagination
to these components.
---
 swh/search/elasticsearch.py     | 23 ++++++++++++-----------
 swh/search/in_memory.py         | 17 +++++++++--------
 swh/search/tests/test_cli.py    |  2 +-
 swh/search/tests/test_search.py | 24 ++++++++++++------------
 swh/search/utils.py             | 12 ++++++------
 5 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
index 8c5874c..9aef7ae 100644
--- a/swh/search/elasticsearch.py
+++ b/swh/search/elasticsearch.py
@@ -107,25 +107,25 @@ class ElasticSearch:
     def origin_search(
             self, *,
             url_pattern: str = None, metadata_pattern: str = None,
-            cursor: str = None, count: int = 50
+            scroll_token: str = None, count: int = 50
             ) -> Dict[str, object]:
         """Searches for origins matching the `url_pattern`.
 
         Args:
             url_pattern (str): Part of thr URL to search for
-            cursor (str): `cursor` is opaque value used for pagination.
+            scroll_token (str): `scroll_token` is an opaque value used for
+                                pagination.
             count (int): number of results to return.
 
         Returns:
             a dictionary with keys:
-            * `cursor`:
+            * `scroll_token`:
               opaque value used for fetching more results. `None` if there
               are no more result.
             * `results`:
               list of dictionaries with key:
               * `url`: URL of a matching origin
         """
-        # TODO: find a better name for "cursor"
         query_clauses = []
 
         if url_pattern:
@@ -171,11 +171,11 @@ class ElasticSearch:
                 {'_id': 'asc'},
             ]
         }
-        if cursor:
+        if scroll_token:
             # TODO: use ElasticSearch's scroll API?
-            cursor = msgpack.loads(base64.b64decode(cursor))
+            scroll_token = msgpack.loads(base64.b64decode(scroll_token))
             body['search_after'] = \
-                [cursor[b'score'], cursor[b'id'].decode('ascii')]
+                [scroll_token[b'score'], scroll_token[b'id'].decode('ascii')]
 
         res = self._backend.search(
             index='origin',
@@ -187,16 +187,17 @@ class ElasticSearch:
 
         if len(hits) == count:
             last_hit = hits[-1]
-            next_cursor = {
+            next_scroll_token = {
                 b'score': last_hit['_score'],
                 b'id': last_hit['_id'],
             }
-            next_cursor = base64.b64encode(msgpack.dumps(next_cursor))
+            next_scroll_token = base64.b64encode(msgpack.dumps(
+                next_scroll_token))
         else:
-            next_cursor = None
+            next_scroll_token = None
 
         return {
-            'cursor': next_cursor,
+            'scroll_token': next_scroll_token,
             'results': [
                 {
                     # TODO: also add 'id'?
diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py
index 8eae987..4592457 100644
--- a/swh/search/in_memory.py
+++ b/swh/search/in_memory.py
@@ -61,7 +61,7 @@ class InMemorySearch:
     def origin_search(
             self, *,
             url_pattern: str = None, metadata_pattern: str = None,
-            cursor: str = None, count: int = 50
+            scroll_token: str = None, count: int = 50
             ) -> Dict[str, object]:
         matches = (self._origins[id_] for id_ in self._origin_ids)
 
@@ -91,9 +91,9 @@ class InMemorySearch:
                 'At least one of url_pattern and metadata_pattern '
                 'must be provided.')
 
-        if cursor:
-            cursor = msgpack.loads(base64.b64decode(cursor))
-            start_at_index = cursor[b'start_at_index']
+        if scroll_token:
+            scroll_token = msgpack.loads(base64.b64decode(scroll_token))
+            start_at_index = scroll_token[b'start_at_index']
         else:
             start_at_index = 0
 
@@ -101,15 +101,16 @@ class InMemorySearch:
             matches, start_at_index, start_at_index+count))
 
         if len(hits) == count:
-            next_cursor = {
+            next_scroll_token = {
                 b'start_at_index': start_at_index+count,
             }
-            next_cursor = base64.b64encode(msgpack.dumps(next_cursor))
+            next_scroll_token = base64.b64encode(msgpack.dumps(
+                next_scroll_token))
         else:
-            next_cursor = None
+            next_scroll_token = None
 
         return {
-            'cursor': next_cursor,
+            'scroll_token': next_scroll_token,
             'results': [
                 {'url': hit['url']}
                 for hit in hits
diff --git a/swh/search/tests/test_cli.py b/swh/search/tests/test_cli.py
index e7cf7f7..15bd576 100644
--- a/swh/search/tests/test_cli.py
+++ b/swh/search/tests/test_cli.py
@@ -82,5 +82,5 @@ class CliTestCase(BaseElasticsearchTest):
         assert result.output == expected_output
 
         results = self.search.origin_search(url_pattern='foobar')
-        assert results == {'cursor': None, 'results': [
+        assert results == {'scroll_token': None, 'results': [
             {'url': 'http://foobar.baz'}]}
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
index e25f03e..4c4e2e9 100644
--- a/swh/search/tests/test_search.py
+++ b/swh/search/tests/test_search.py
@@ -17,20 +17,20 @@ class CommonSearchTest:
         ])
 
         results = self.search.origin_search(url_pattern='foobar')
-        assert results == {'cursor': None, 'results': [
+        assert results == {'scroll_token': None, 'results': [
             {'url': 'http://foobar.baz'}]}
 
         results = self.search.origin_search(url_pattern='barb')
-        assert results == {'cursor': None, 'results': [
+        assert results == {'scroll_token': None, 'results': [
             {'url': 'http://barbaz.qux'}]}
 
         # 'bar' is part of 'foobar', but is not the beginning of it
         results = self.search.origin_search(url_pattern='bar')
-        assert results == {'cursor': None, 'results': [
+        assert results == {'scroll_token': None, 'results': [
             {'url': 'http://barbaz.qux'}]}
 
         results = self.search.origin_search(url_pattern='barbaz')
-        assert results == {'cursor': None, 'results': [
+        assert results == {'scroll_token': None, 'results': [
             {'url': 'http://barbaz.qux'}]}
 
     def test_origin_url_unique_word_prefix_multiple_results(self):
@@ -41,14 +41,14 @@ class CommonSearchTest:
         ])
 
         results = self.search.origin_search(url_pattern='qu')
-        assert results['cursor'] is None
+        assert results['scroll_token'] is None
 
         results = [res['url'] for res in results['results']]
         expected_results = ['http://qux.quux', 'http://barbaz.qux']
         assert sorted(results) == sorted(expected_results)
 
         results = self.search.origin_search(url_pattern='qux')
-        assert results['cursor'] is None
+        assert results['scroll_token'] is None
 
         results = [res['url'] for res in results['results']]
         expected_results = ['http://barbaz.qux', 'http://qux.quux']
@@ -77,16 +77,16 @@ class CommonSearchTest:
         ])
 
         results = self.search.origin_search(metadata_pattern='foo')
-        assert results == {'cursor': None, 'results': [
+        assert results == {'scroll_token': None, 'results': [
             {'url': 'http://origin2'}]}
 
         # ES returns both results, because blahblah
         results = self.search.origin_search(metadata_pattern='foo bar')
-        assert results == {'cursor': None, 'results': [
+        assert results == {'scroll_token': None, 'results': [
             {'url': 'http://origin2'}, {'url': 'http://origin3'}]}
 
         results = self.search.origin_search(metadata_pattern='bar baz')
-        assert results == {'cursor': None, 'results': [
+        assert results == {'scroll_token': None, 'results': [
             {'url': 'http://origin3'}, {'url': 'http://origin2'}]}
 
     def test_origin_intrinsic_metadata_nested(self):
@@ -112,15 +112,15 @@ class CommonSearchTest:
         ])
 
         results = self.search.origin_search(metadata_pattern='foo')
-        assert results == {'cursor': None, 'results': [
+        assert results == {'scroll_token': None, 'results': [
             {'url': 'http://origin2'}]}
 
         results = self.search.origin_search(metadata_pattern='foo bar')
-        assert results == {'cursor': None, 'results': [
+        assert results == {'scroll_token': None, 'results': [
             {'url': 'http://origin2'}, {'url': 'http://origin3'}]}
 
         results = self.search.origin_search(metadata_pattern='bar baz')
-        assert results == {'cursor': None, 'results': [
+        assert results == {'scroll_token': None, 'results': [
             {'url': 'http://origin3'}, {'url': 'http://origin2'}]}
 
     # TODO: add more tests with more codemeta terms
diff --git a/swh/search/utils.py b/swh/search/utils.py
index a91c98a..19b6551 100644
--- a/swh/search/utils.py
+++ b/swh/search/utils.py
@@ -5,12 +5,12 @@
 
 
 def stream_results(f, *args, **kwargs):
-    if 'cursor' in kwargs:
-        raise TypeError('stream_results has no argument "cursor".')
-    cursor = None
+    if 'scroll_token' in kwargs:
+        raise TypeError('stream_results has no argument "scroll_token".')
+    scroll_token = None
     while True:
-        results = f(*args, cursor=cursor, **kwargs)
+        results = f(*args, scroll_token=scroll_token, **kwargs)
         yield from results['results']
-        cursor = results['cursor']
-        if cursor is None:
+        scroll_token = results['scroll_token']
+        if scroll_token is None:
             break
-- 
GitLab