Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
swh-loader-pypi
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
Platform
Development
swh-loader-pypi
Commits
c1a3a29b
Unverified
Commit
c1a3a29b
authored
6 years ago
by
Antoine R. Dumont
Browse files
Options
Downloads
Patches
Plain Diff
swh.loader.pypi: Update docstrings
parent
d32c08b2
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
swh/loader/pypi/client.py
+45
-5
45 additions, 5 deletions
swh/loader/pypi/client.py
swh/loader/pypi/loader.py
+13
-2
13 additions, 2 deletions
swh/loader/pypi/loader.py
swh/loader/pypi/model.py
+23
-2
23 additions, 2 deletions
swh/loader/pypi/model.py
with
81 additions
and
9 deletions
swh/loader/pypi/client.py
+
45
−
5
View file @
c1a3a29b
...
...
@@ -72,10 +72,12 @@ class PyPiClient:
"""
Log the response from a server request to a cache dir.
Args:
response: full server response
cache_dir: system path for cache dir
response (Response): full server response
cache_dir (str): system path for cache dir
Returns:
nothing
"""
import
gzip
from
json
import
dumps
...
...
@@ -90,6 +92,15 @@ class PyPiClient:
def
_get
(
self
,
url
):
"""
Get query to the url.
Args:
url (str): Url
Raises:
ValueError in case of failing to query
Returns:
Response as dict if ok
"""
response
=
self
.
session
.
get
(
url
,
**
self
.
params
)
if
response
.
status_code
!=
200
:
...
...
@@ -104,17 +115,48 @@ class PyPiClient:
def
info
(
self
,
project_url
):
"""
Given a metadata project url, retrieve the raw json response
Args:
project_url (str): Project
'
s pypi to retrieve information
Returns:
Main project information as dict.
"""
return
self
.
_get
(
project_url
)
def
release
(
self
,
project
,
release
):
"""
Given a project and a release name, retrieve the raw json response
"""
Given a project and a release name, retrieve the raw information
for said project
'
s release.
Args:
project (str): Project
'
s name
release (dict): Release information
Returns:
Release information as dict
"""
release_url
=
'
https://pypi.org/pypi/%s/%s/json
'
%
(
project
,
release
)
return
self
.
_get
(
release_url
)
def
fetch_release
(
self
,
project
,
release
):
"""
Fetch for a given release project the associated artifact.
This:
- fetches the artifact
- checks the size, hashes match
- uncompress the artifact locally
- computes the swh hashes
- returns the associated information for the artifact
Args:
project (str): Project
'
s name
release (dict): Release information
Returns:
Release information (dict) updated with the artifact information
"""
version
=
release
[
'
name
'
]
logging
.
debug
(
'
Release version: %s
'
%
version
)
path
=
os
.
path
.
join
(
self
.
temp_directory
,
project
,
version
)
...
...
@@ -128,8 +170,6 @@ class PyPiClient:
raise
ValueError
(
"
Fail to query
'
%s
'
. Reason: %s
"
%
(
url
,
r
.
status_code
))
# checks
_len
=
len
(
r
.
content
)
if
_len
!=
release
[
'
size
'
]:
raise
ValueError
(
'
Error when checking size: %s != %s
'
%
(
...
...
This diff is collapsed.
Click to expand it.
swh/loader/pypi/loader.py
+
13
−
2
View file @
c1a3a29b
...
...
@@ -43,7 +43,8 @@ class PyPiLoader(SWHLoader):
self
.
debug
=
self
.
config
[
'
debug
'
]
def
pre_cleanup
(
self
):
"""
(override) To prevent disk explosion...
"""
(override) To prevent disk explosion if some other workers exploded
in mid-air (OOM killed), we try and clean up dangling files.
"""
clean_dangling_folders
(
self
.
temp_directory
,
...
...
@@ -67,18 +68,28 @@ class PyPiLoader(SWHLoader):
origin_metadata_url
=
None
):
"""
(override) Prepare the origin visit information
Args:
project_name (str): Project
'
s simple name
origin_url (str): Project
'
s main url
origin_metadata_url (str): Project
'
s metadata url
"""
self
.
origin
=
{
'
url
'
:
origin_url
,
'
type
'
:
'
pypi
'
,
}
self
.
visit_date
=
None
self
.
visit_date
=
None
# loader core will populate it
def
prepare
(
self
,
project_name
,
origin_url
,
origin_metadata_url
=
None
):
"""
(override) Keep reference to the origin url (project) and the
project metadata url
Args:
project_name (str): Project
'
s simple name
origin_url (str): Project
'
s main url
origin_metadata_url (str): Project
'
s metadata url
"""
self
.
project_name
=
project_name
self
.
origin_url
=
origin_url
...
...
This diff is collapsed.
Click to expand it.
swh/loader/pypi/model.py
+
23
−
2
View file @
c1a3a29b
...
...
@@ -53,8 +53,10 @@ class PyPiProject:
This permits to extract information for the:
- project, either the latest information (from the last revision)
- project information for a given release
- same for author information
- either the information for a given release
- Symmetrically for the release author information
This also fetches and uncompress the associated release artifacts.
"""
def
__init__
(
self
,
client
,
project
,
project_metadata_url
,
data
=
None
):
...
...
@@ -72,6 +74,10 @@ class PyPiProject:
}
def
_data
(
self
,
release_name
=
None
):
"""
Fetch data per release and cache it. Returns the cache retrieved
data if already fetched.
"""
if
release_name
:
data
=
self
.
cache
.
get
(
release_name
)
if
not
data
:
...
...
@@ -82,12 +88,27 @@ class PyPiProject:
return
data
def
info
(
self
,
release_name
=
None
):
"""
Compute release information for release provided or the latest one.
"""
return
info
(
self
.
_data
(
release_name
))
def
author
(
self
,
release_name
=
None
):
"""
Compute author for the provided release if provided (use the latest
release otherwise).
"""
return
author
(
self
.
_data
(
release_name
))
def
releases
(
self
):
"""
Fetch metadata and data per release.
This downloads and uncompresses the release artifacts.
Yields:
tuple (version, release)
"""
# The compute information per release
releases_dict
=
self
.
data
[
'
releases
'
]
for
version
in
releases_dict
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment