Skip to content
Snippets Groups Projects
Commit 2ed36c90 authored by vlorentz's avatar vlorentz
Browse files

Add api_origin_metadata_search.

Reviewers: #reviewers, anlambert

Reviewed By: #reviewers, anlambert

Subscribers: anlambert, zack, swh-public-ci

Differential Revision: https://forge.softwareheritage.org/D751
parents a5642aec de1ee11b
No related merge requests found
......@@ -6,6 +6,7 @@
from distutils.util import strtobool
from swh.web.common import service
from swh.web.common.exc import BadInputExc
from swh.web.common.utils import (
reverse, get_origin_visits
)
......@@ -174,6 +175,56 @@ def api_origin_search(request, url_pattern):
return result
@api_route(r'/origin/metadata-search/',
'api-origin-metadata-search')
@api_doc('/origin/metadata-search/', noargs=True)
def api_origin_metadata_search(request):
"""
.. http:get:: /api/1/origin/metadata-search/
Search for software origins whose metadata (expressed as a
JSON-LD/CodeMeta dictionary) match the provided criteria.
For now, only full-text search on this dictionary is supported.
:query str fulltext: a string that will be matched against origin metadata;
results are ranked and ordered starting with the best ones.
:query int limit: the maximum number of found origins to return
(bounded to 100)
:>jsonarr number origin_id: the origin unique identifier
:>jsonarr dict metadata: metadata of the origin (as a JSON-LD/CodeMeta dictionary)
:>jsonarr string from_revision: the revision used to extract these
metadata (the current HEAD or one of the former HEADs)
:>jsonarr dict tool: the tool used to extract these metadata
:reqheader Accept: the requested response content type,
either ``application/json`` (default) or ``application/yaml``
:resheader Content-Type: this depends on :http:header:`Accept` header of request
**Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options`
:statuscode 200: no error
**Example:**
.. parsed-literal::
:swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe`
""" # noqa
fulltext = request.query_params.get('fulltext', None)
limit = min(int(request.query_params.get('limit', '70')), 100)
if not fulltext:
content = '"fulltext" must be provided and non-empty.'
raise BadInputExc(content)
results = api_lookup(service.search_origin_metadata, fulltext, limit)
return {
'results': results,
}
@api_route(r'/origin/(?P<origin_id>[0-9]+)/visits/', 'api-origin-visits')
@api_doc('/origin/visits/')
def api_origin_visits(request, origin_id):
......
......@@ -266,6 +266,25 @@ def search_origin(url_pattern, offset=0, limit=50, regexp=False,
return map(converters.from_origin, origins)
def search_origin_metadata(fulltext, limit=50):
"""Search for origins whose metadata match a provided string pattern.
Args:
fulltext: the string pattern to search for in origin metadata
offset: number of found origins to skip before returning results
limit: the maximum number of found origins to return
Returns:
list of origin metadata as dict.
"""
results = idx_storage.origin_intrinsic_metadata_search_fulltext(
conjunction=[fulltext], limit=limit)
for result in results:
result['from_revision'] = hashutil.hash_to_hex(result['from_revision'])
return results
def lookup_person(person_id):
"""Return information about the person with id person_id.
......
......@@ -244,3 +244,109 @@ class OriginApiTestCase(SWHWebTestCase, APITestCase):
})
mock_service.lookup_origin.assert_called_with({'id': '4321'})
@patch('swh.web.common.service.idx_storage')
def test_api_origin_metadata_search(self, mock_idx_storage):
# given
mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
.return_value = [{
'from_revision':
b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8',
'metadata': {'author': 'Jane Doe'},
'origin_id': 54974445,
'tool': {
'configuration': {
'context': ['NpmMapping', 'CodemetaMapping'],
'type': 'local'
},
'id': 3,
'name': 'swh-metadata-detector',
'version': '0.0.1'
}
}]
# when
rv = self.client.get(
'/api/1/origin/metadata-search/?fulltext=Jane%20Doe')
# then
self.assertEqual(rv.status_code, 200, rv.content)
self.assertEqual(rv['Content-Type'], 'application/json')
expected_data = [{
'origin_id': 54974445,
'metadata': {'author': 'Jane Doe'},
'from_revision': '7026b7c1a2af56521e951c01ed20f255fa054238',
'tool': {
'configuration': {
'context': ['NpmMapping', 'CodemetaMapping'],
'type': 'local'
},
'id': 3,
'name': 'swh-metadata-detector',
'version': '0.0.1',
}
}]
self.assertEqual(rv.data, expected_data)
mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
.assert_called_with(conjunction=['Jane Doe'], limit=70)
@patch('swh.web.common.service.idx_storage')
def test_api_origin_metadata_search_limit(self, mock_idx_storage):
# given
mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
.return_value = [{
'from_revision':
b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8',
'metadata': {'author': 'Jane Doe'},
'origin_id': 54974445,
'tool': {
'configuration': {
'context': ['NpmMapping', 'CodemetaMapping'],
'type': 'local'
},
'id': 3,
'name': 'swh-metadata-detector',
'version': '0.0.1'
}
}]
# when
rv = self.client.get(
'/api/1/origin/metadata-search/?fulltext=Jane%20Doe')
# then
self.assertEqual(rv.status_code, 200, rv.content)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(len(rv.data), 1)
mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
.assert_called_with(conjunction=['Jane Doe'], limit=70)
# when
rv = self.client.get(
'/api/1/origin/metadata-search/?fulltext=Jane%20Doe&limit=10')
# then
self.assertEqual(rv.status_code, 200, rv.content)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(len(rv.data), 1)
mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
.assert_called_with(conjunction=['Jane Doe'], limit=10)
# when
rv = self.client.get(
'/api/1/origin/metadata-search/?fulltext=Jane%20Doe&limit=987')
# then
self.assertEqual(rv.status_code, 200, rv.content)
self.assertEqual(rv['Content-Type'], 'application/json')
self.assertEqual(len(rv.data), 1)
mock_idx_storage.origin_intrinsic_metadata_search_fulltext \
.assert_called_with(conjunction=['Jane Doe'], limit=100)
@patch('swh.web.common.service.idx_storage')
def test_api_origin_metadata_search_invalid(self, mock_idx_storage):
rv = self.client.get('/api/1/origin/metadata-search/')
# then
self.assertEqual(rv.status_code, 400, rv.content)
mock_idx_storage.assert_not_called()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment