Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
swh-loader-git-old
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Nicolas Dandrimont
swh-loader-git-old
Commits
705976f7
Commit
705976f7
authored
3 years ago
by
vlorentz
Browse files
Options
Downloads
Patches
Plain Diff
Remove unnecessary use of dulwich.client.HttpGitClient
'requests' does the job just fine with less complexity.
parent
d7481af6
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
swh/loader/git/dumb.py
+12
-23
12 additions, 23 deletions
swh/loader/git/dumb.py
with
12 additions
and
23 deletions
swh/loader/git/dumb.py
+
12
−
23
View file @
705976f7
...
...
@@ -11,12 +11,12 @@ import stat
import
struct
from
tempfile
import
SpooledTemporaryFile
from
typing
import
TYPE_CHECKING
,
Callable
,
Dict
,
Iterable
,
List
,
Set
,
cast
import
urllib.parse
from
dulwich.client
import
HttpGitClient
from
dulwich.errors
import
NotGitRepository
from
dulwich.objects
import
S_IFGITLINK
,
Commit
,
ShaFile
,
Tree
from
dulwich.pack
import
Pack
,
PackData
,
PackIndex
,
load_pack_index_file
from
urllib3.response
import
HTTPResponse
import
requests
from
swh.loader.git.utils
import
HexBytes
...
...
@@ -26,18 +26,7 @@ if TYPE_CHECKING:
logger
=
logging
.
getLogger
(
__name__
)
class
DumbHttpGitClient
(
HttpGitClient
):
"""
Simple wrapper around dulwich.client.HTTPGitClient
"""
def
__init__
(
self
,
base_url
:
str
):
super
().
__init__
(
base_url
)
self
.
user_agent
=
"
Software Heritage dumb Git loader
"
def
get
(
self
,
url
:
str
)
->
HTTPResponse
:
logger
.
debug
(
"
Fetching %s
"
,
url
)
response
,
_
=
self
.
_http_request
(
url
,
headers
=
{
"
User-Agent
"
:
self
.
user_agent
})
return
response
HEADERS
=
{
"
User-Agent
"
:
"
Software Heritage dumb Git loader
"
}
def
check_protocol
(
repo_url
:
str
)
->
bool
:
...
...
@@ -52,12 +41,11 @@ def check_protocol(repo_url: str) -> bool:
"""
if
not
repo_url
.
startswith
(
"
http
"
):
return
False
http_client
=
DumbHttpGitClient
(
repo_url
)
url
=
http_client
.
get_url
(
"
info/refs?service=git-upload-pack
"
)
response
=
http_client
.
get
(
url
)
content_type
=
response
.
getheader
(
"
Content-Type
"
)
url
=
urllib
.
parse
.
urljoin
(
repo_url
,
"
info/refs?service=git-upload-pack/
"
)
response
=
requests
.
get
(
url
,
headers
=
HEADERS
)
content_type
=
response
.
headers
.
get
(
"
Content-Type
"
)
return
(
response
.
status
in
(
200
,
304
,)
response
.
status
_code
in
(
200
,
304
,)
# header is not mandatory in protocol specification
and
(
content_type
is
None
or
not
content_type
.
startswith
(
"
application/x-git-
"
))
)
...
...
@@ -75,7 +63,8 @@ class GitObjectsFetcher:
"""
def
__init__
(
self
,
repo_url
:
str
,
base_repo
:
RepoRepresentation
):
self
.
http_client
=
DumbHttpGitClient
(
repo_url
)
self
.
_session
=
requests
.
Session
()
self
.
repo_url
=
repo_url
self
.
base_repo
=
base_repo
self
.
objects
:
Dict
[
bytes
,
Set
[
bytes
]]
=
defaultdict
(
set
)
self
.
refs
=
self
.
_get_refs
()
...
...
@@ -124,10 +113,10 @@ class GitObjectsFetcher:
return
map
(
self
.
_get_git_object
,
self
.
objects
[
object_type
])
def
_http_get
(
self
,
path
:
str
)
->
SpooledTemporaryFile
:
url
=
self
.
http_client
.
get
_url
(
path
)
response
=
self
.
http_client
.
get
(
url
)
url
=
urllib
.
parse
.
urljoin
(
self
.
repo
_url
,
path
)
response
=
self
.
_session
.
get
(
url
,
headers
=
HEADERS
)
buffer
=
SpooledTemporaryFile
(
max_size
=
100
*
1024
*
1024
)
buffer
.
write
(
response
.
data
)
buffer
.
write
(
response
.
content
)
buffer
.
flush
()
buffer
.
seek
(
0
)
return
buffer
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment