add Python client for the Web API
Merge request reports
Activity
Build is green See https://jenkins.softwareheritage.org/job/DWCLI/job/tox/1/ for more details.
Some references in the commit message have been migrated:
- T2279 is now swh-web#2279 (closed)
Build is green See https://jenkins.softwareheritage.org/job/DWCLI/job/tox/2/ for more details.
Build is green See https://jenkins.softwareheritage.org/job/DWCLI/job/tox/3/ for more details.
- swh/web/client/client.py 0 → 100644
174 merging them together before returning 175 176 """ 177 snapshot = {} 178 for snp in self.snapshot(pid): 179 snapshot.update(snp) 180 181 return snapshot 182 183 def get(self, pid: PIDish, **req_args) -> Any: 184 """Retrieve information about an object of any kind 185 186 Dispatcher method over the more specific methods content(), 187 directory(), etc. 188 189 Note that this method will buffer the entire output in case of long, - swh/web/client/tests/conftest.py 0 → 100644
8 from .api_data import API_URL, API_DATA 9 from swh.web.client import WebAPIClient 10 11 12 @pytest.fixture 13 def web_api_mock(requests_mock): 14 for api_call, data in API_DATA.items(): 15 headers = {} 16 if api_call == "snapshot/cabcc7d7bf639bbe1cc3b41989e1806618dd5764/": 17 # monkey patch the only URL that require a special response headers 18 # (to make the client insit and follow pagination) 19 headers = { 20 "Link": 21 f"<{API_URL}/{api_call}?branches_count=1000&branches_from=refs/tags/v3.0-rc7>; rel=\"next\"" # NoQA: E501 22 } 23 requests_mock.get(f"{API_URL}/{api_call}", text=data, headers=headers) - swh/web/client/tests/test_web_api_client.py 0 → 100644
1 # Copyright (C) 2020 The Software Heritage developers 2 # See the AUTHORS file at the top-level directory of this distribution 3 # License: GNU General Public License version 3, or any later version 4 # See top-level LICENSE file for more information 5 6 from dateutil.parser import parse as parse_date 7 8 from swh.model.identifiers import parse_persistent_identifier as parse_pid 9 10 11 def test_get_content(web_api_client, web_api_mock): 12 pid = parse_pid("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1") 13 obj = web_api_client.get(pid) - swh/web/client/tests/test_web_api_client.py 0 → 100644
10 11 def test_get_content(web_api_client, web_api_mock): 12 pid = parse_pid("swh:1:cnt:fe95a46679d128ff167b7c55df5d02356c5a1ae1") 13 obj = web_api_client.get(pid) 14 15 assert obj["length"] == 151810 16 for key in ("length", "status", "checksums", "data_url"): 17 assert key in obj 18 assert obj["checksums"]["sha1_git"] == str(pid).split(":")[3] 19 assert obj["checksums"]["sha1"] == \ 20 "dc2830a9e72f23c1dfebef4413003221baa5fb62" 21 22 assert obj == web_api_client.content(pid) 23 24 25 def test_get_directory(web_api_client, web_api_mock): - swh/web/client/tests/test_web_api_client.py 0 → 100644
24 25 def test_get_directory(web_api_client, web_api_mock): 26 pid = parse_pid("swh:1:dir:977fc4b98c0e85816348cebd3b12026407c368b6") 27 obj = web_api_client.get(pid) 28 29 assert len(obj) == 35 # number of directory entries 30 assert all(map(lambda entry: entry["dir_id"] == pid, obj)) 31 dir_entry = obj[0] 32 assert dir_entry["type"] == "file" 33 assert dir_entry["target"] == parse_pid( 34 "swh:1:cnt:58471109208922c9ee8c4b06135725f03ed16814") 35 assert dir_entry["name"] == ".bzrignore" 36 assert dir_entry["length"] == 582 37 38 assert obj == web_api_client.directory(pid) 39 - swh/web/client/tests/test_web_api_client.py 0 → 100644
39 40 41 def test_get_release(web_api_client, web_api_mock): 42 pid = parse_pid("swh:1:rel:b9db10d00835e9a43e2eebef2db1d04d4ae82342") 43 obj = web_api_client.get(pid) 44 45 assert obj["id"] == pid 46 assert obj["author"]["fullname"] == "Paul Tagliamonte <tag@pault.ag>" 47 assert obj["author"]["name"] == "Paul Tagliamonte" 48 assert obj["date"] == parse_date("2013-07-06T19:34:11-04:00") 49 assert obj["name"] == "0.9.9" 50 assert obj["target_type"] == "revision" 51 assert obj["target"] == parse_pid( 52 "swh:1:rev:e005cb773c769436709ca6a1d625dc784dbc1636") 53 assert not obj["synthetic"] 54 I have added a couple of inline comments that should be addressed before landing the client.
I noticed that only PIDS can be provided to the
content
,directory
,revision
,release
andsnapshot
methods. I imagine this is intended for validation purposes. But it could be great to also allow to pass hashes to these methods as we can easily reconstruct the PIDS from them when we know the object type.Also a nitpick: for consistency with the other swh modules docstrings, sentences should begin with an upper case letter.
Next step: add a CLI tool wrapping that web api client ?
Build is green See https://jenkins.softwareheritage.org/job/DWCLI/job/tox/4/ for more details.
Build is green See https://jenkins.softwareheritage.org/job/DWCLI/job/tox/5/ for more details.
Thanks for the great review !
All done, except for this point, which was indeed on purpose:
I noticed that only PIDS can be provided to the content, directory, revision, release and snapshot methods.
not really for validation purposes (although it clearly helps with that), but rather because a background idea here is to be future-proof, and already adopt a convention that we have on the roadmap for API v2, i.e., use PIDs (and only PIDs) everywhere.