Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
swh-lister
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Antoine R. Dumont
swh-lister
Commits
935b9cd2
Verified
Commit
935b9cd2
authored
6 years ago
by
Antoine R. Dumont
Browse files
Options
Downloads
Patches
Plain Diff
swh.lister.core: Make gitlab lister a paging lister instance
Related T989
parent
db36c499
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
swh/lister/core/paging_lister.py
+117
-0
117 additions, 0 deletions
swh/lister/core/paging_lister.py
swh/lister/gitlab/lister.py
+3
-15
3 additions, 15 deletions
swh/lister/gitlab/lister.py
with
120 additions
and
15 deletions
swh/lister/core/paging_lister.py
0 → 100644
+
117
−
0
View file @
935b9cd2
# Copyright (C) 2015-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import
abc
import
logging
from
.lister_transports
import
SWHListerHttpTransport
from
.lister_base
import
SWHListerBase
class
SWHPagingLister
(
SWHListerBase
):
"""
Lister* intermediate class for any service that follows the simple
pagination page pattern.
- Client sends a request to list repositories starting from a
given page identifier.
- Client receives structured (json/xml/etc) response with
information about a sequential series of repositories (per page)
starting from a given index. And, if available, some indication
of the next page index for fetching the remaining repository
data.
See :class:`swh.lister.core.lister_base.SWHListerBase` for more
details.
This class cannot be instantiated. To create a new Lister for a
source code listing service that follows the model described
above, you must subclass this class. Then provide the required
overrides in addition to any unmet implementation/override
requirements of this class
'
s base (see parent class and member
docstrings for details).
Required Overrides::
def get_next_target_from_response
"""
@abc.abstractmethod
def
get_next_target_from_response
(
self
,
response
):
"""
Find the next server endpoint page given the entire response.
Implementation of this method depends on the server API spec
and the shape of the network response object returned by the
transport_request method.
For example, some api can use the headers links to provide the
next page.
Args:
response (transport response): response page from the server
Returns:
index of next page, possibly extracted from a next href url
"""
pass
# You probably don't need to override anything below this line.
def
run
(
self
,
min_index
=
None
,
max_index
=
None
):
"""
Main entry function. Sequentially fetches repository data from the
service according to the basic outline in the class
docstring. Continually fetching sublists until either there
is no next index reference given or the given next index is
greater than the desired max_index.
Args:
min_index (indexable type): optional index to start from
max_index (indexable type): optional index to stop at
Returns:
nothing
"""
index
=
min_index
or
''
loop_count
=
0
self
.
min_index
=
min_index
self
.
max_index
=
max_index
while
self
.
is_within_bounds
(
index
,
self
.
min_index
,
self
.
max_index
):
logging
.
info
(
'
listing repos starting at %s
'
%
index
)
response
,
injected_repos
=
self
.
ingest_data
(
index
)
next_index
=
self
.
get_next_target_from_response
(
response
)
# termination condition
if
(
next_index
is
None
)
or
(
next_index
==
index
):
logging
.
info
(
'
stopping after index %s, no next link found
'
%
index
)
break
else
:
index
=
next_index
loop_count
+=
1
if
loop_count
==
20
:
logging
.
info
(
'
flushing updates
'
)
loop_count
=
0
self
.
db_session
.
commit
()
self
.
db_session
=
self
.
mk_session
()
self
.
db_session
.
commit
()
self
.
db_session
=
self
.
mk_session
()
class
SWHPagingHttpLister
(
SWHListerHttpTransport
,
SWHPagingLister
):
"""
Convenience class for ensuring right lookup and init order when
combining SWHPagingLister and SWHListerHttpTransport.
"""
def
__init__
(
self
,
lister_name
=
None
,
api_baseurl
=
None
,
override_config
=
None
):
SWHListerHttpTransport
.
__init__
(
self
,
api_baseurl
=
api_baseurl
)
SWHPagingLister
.
__init__
(
self
,
lister_name
=
lister_name
,
override_config
=
override_config
)
This diff is collapsed.
Click to expand it.
swh/lister/gitlab/lister.py
+
3
−
15
View file @
935b9cd2
...
...
@@ -6,20 +6,14 @@ import random
import
re
import
time
from
..core.
index
ing_lister
import
SWH
Index
ingHttpLister
from
..core.
pag
ing_lister
import
SWH
Pag
ingHttpLister
from
.models
import
GitLabModel
class
GitLabLister
(
SWH
Index
ingHttpLister
):
#
Path to give and mentioning the last id for the next
page
class
GitLabLister
(
SWH
Pag
ingHttpLister
):
#
Template path expecting an integer that represents the
page
id
PATH_TEMPLATE
=
'
/projects?page=%d&order_by=id&sort=asc&simple=true
'
# gitlab api do not have an indexable identifier so using the page
# id
API_URL_INDEX_RE
=
re
.
compile
(
r
'
^.*/projects.*\&page=(\d+).*
'
)
# The indexable field, the one we are supposed to use in the api
# query is not part of the lookup query. So, we cannot filter
# (method filter_before_inject), nor detect and disable origins
# (method disable_deleted_repo_tasks)
MODEL
=
GitLabModel
@property
...
...
@@ -79,12 +73,6 @@ class GitLabLister(SWHIndexingHttpLister):
params
[
'
auth
'
]
=
(
auth
[
'
username
'
],
auth
[
'
password
'
])
return
params
def
filter_before_inject
(
self
,
models_list
):
"""
We cannot filter so returns the models_list as is.
"""
return
models_list
def
get_model_from_repo
(
self
,
repo
):
return
{
'
instance
'
:
self
.
lister_name
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment