Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
swh-model
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Renaud Boyer
swh-model
Commits
9b9ec94a
Verified
Commit
9b9ec94a
authored
8 years ago
by
Antoine R. Dumont
Browse files
Options
Downloads
Patches
Plain Diff
Optimize walk for edge cases
parent
a91bf69b
No related branches found
Branches containing commit
Tags
v0.0.9
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
swh/model/git.py
+116
-36
116 additions, 36 deletions
swh/model/git.py
with
116 additions
and
36 deletions
swh/model/git.py
+
116
−
36
View file @
9b9ec94a
...
@@ -186,8 +186,114 @@ def compute_tree_metadata(dirname, ls_hashes):
...
@@ -186,8 +186,114 @@ def compute_tree_metadata(dirname, ls_hashes):
}
}
def
default_validation_dir
(
dirpath
):
"""
Default validation function.
This is the equivalent of the identity function.
Args:
dirpath: Path to validate
Returns: True
"""
return
True
def
__walk
(
rootdir
,
dir_ok_fn
=
default_validation_dir
,
remove_empty_folder
=
False
):
"""
Walk the filesystem and yields a 3 tuples (dirpath, dirnames as set
of absolute paths, filenames as set of abslute paths)
Ignore files which won
'
t pass the dir_ok_fn validation.
If remove_empty_folder is True, remove and ignore any
encountered empty folder.
Args:
- rootdir: starting walk root directory path
- dir_ok_fn: validation function. if folder encountered are
not ok, they are ignored. Default to default_validation_dir
which does nothing.
- remove_empty_folder: Flag to remove and ignore any
encountered empty folders.
Yields:
3 tuples dirpath, set of absolute children dirname paths, set
of absolute filename paths.
"""
def
basic_gen_dir
(
rootdir
):
for
dp
,
dns
,
fns
in
os
.
walk
(
rootdir
,
topdown
=
False
):
yield
(
dp
,
set
((
os
.
path
.
join
(
dp
,
dn
)
for
dn
in
dns
)),
set
((
os
.
path
.
join
(
dp
,
fn
)
for
fn
in
fns
)))
if
dir_ok_fn
==
default_validation_dir
:
if
not
remove_empty_folder
:
# os.walk
yield
from
basic_gen_dir
(
rootdir
)
else
:
# os.walk + empty dir cleanup
empty_folders
=
set
()
for
dp
,
dns
,
fns
in
basic_gen_dir
(
rootdir
):
if
not
dns
and
not
fns
:
empty_folders
.
add
(
dp
)
# need to remove it because folder of empty folder
# is an empty folder!!!
if
os
.
path
.
islink
(
dp
):
os
.
remove
(
dp
)
else
:
os
.
rmdir
(
dp
)
parent
=
os
.
path
.
dirname
(
dp
)
# edge case about parent containing one empty
# folder which become an empty one
while
not
os
.
listdir
(
parent
):
empty_folders
.
add
(
parent
)
if
os
.
path
.
islink
(
parent
):
os
.
remove
(
parent
)
else
:
os
.
rmdir
(
parent
)
parent
=
os
.
path
.
dirname
(
parent
)
continue
yield
(
dp
,
dns
-
empty_folders
,
fns
)
else
:
def
filtfn
(
dirnames
):
return
set
(
filter
(
dir_ok_fn
,
dirnames
))
gen_dir
=
((
dp
,
dns
,
fns
)
for
dp
,
dns
,
fns
in
basic_gen_dir
(
rootdir
)
if
dir_ok_fn
(
dp
))
if
not
remove_empty_folder
:
# os.walk + filtering
for
dp
,
dns
,
fns
in
gen_dir
:
yield
(
dp
,
filtfn
(
dns
),
fns
)
else
:
# os.walk + filtering + empty dir cleanup
empty_folders
=
set
()
for
dp
,
dns
,
fns
in
gen_dir
:
dps
=
filtfn
(
dns
)
if
not
dps
and
not
fns
:
empty_folders
.
add
(
dp
)
# need to remove it because folder of empty folder
# is an empty folder!!!
if
os
.
path
.
islink
(
dp
):
os
.
remove
(
dp
)
else
:
os
.
rmdir
(
dp
)
parent
=
os
.
path
.
dirname
(
dp
)
# edge case about parent containing one empty
# folder which become an empty one
while
not
os
.
listdir
(
parent
):
empty_folders
.
add
(
parent
)
if
os
.
path
.
islink
(
parent
):
os
.
remove
(
parent
)
else
:
os
.
rmdir
(
parent
)
parent
=
os
.
path
.
dirname
(
parent
)
continue
yield
dp
,
dps
-
empty_folders
,
fns
def
walk_and_compute_sha1_from_directory
(
rootdir
,
def
walk_and_compute_sha1_from_directory
(
rootdir
,
dir_ok_fn
=
lambda
dirpath
:
True
,
dir_ok_fn
=
default_validation_dir
,
with_root_tree
=
True
,
with_root_tree
=
True
,
remove_empty_folder
=
False
):
remove_empty_folder
=
False
):
"""
Compute git sha1 from directory rootdir.
"""
Compute git sha1 from directory rootdir.
...
@@ -228,53 +334,27 @@ def walk_and_compute_sha1_from_directory(rootdir,
...
@@ -228,53 +334,27 @@ def walk_and_compute_sha1_from_directory(rootdir,
if
rootdir
.
endswith
(
b
'
/
'
):
if
rootdir
.
endswith
(
b
'
/
'
):
rootdir
=
rootdir
.
rstrip
(
b
'
/
'
)
rootdir
=
rootdir
.
rstrip
(
b
'
/
'
)
def
filtfn
(
dirpath
,
dirnames
):
for
dirpath
,
dirnames
,
filenames
in
__walk
(
return
list
(
filter
(
lambda
dirname
:
dir_ok_fn
(
os
.
path
.
join
(
dirpath
,
rootdir
,
dir_ok_fn
,
remove_empty_folder
):
dirname
)),
dirnames
))
if
remove_empty_folder
:
# round-trip to remove empty folders
gen_dir
=
((
dp
,
filtfn
(
dp
,
dns
),
fns
)
for
(
dp
,
dns
,
fns
)
in
os
.
walk
(
rootdir
,
topdown
=
False
)
if
dir_ok_fn
(
dp
))
for
dirpath
,
dirnames
,
filenames
in
gen_dir
:
if
dirnames
==
[]
and
filenames
==
[]:
if
os
.
path
.
islink
(
dirpath
):
os
.
remove
(
dirpath
)
else
:
os
.
removedirs
(
dirpath
)
gen_dir
=
((
dp
,
filtfn
(
dp
,
dns
),
fns
)
for
(
dp
,
dns
,
fns
)
in
os
.
walk
(
rootdir
,
topdown
=
False
)
if
dir_ok_fn
(
dp
))
for
dirpath
,
dirnames
,
filenames
in
gen_dir
:
hashes
=
[]
hashes
=
[]
links
=
(
os
.
path
.
join
(
dirpath
,
file
)
links
=
(
file
for
file
in
(
filenames
+
dirnames
)
for
file
in
filenames
.
union
(
dirnames
)
if
os
.
path
.
islink
(
os
.
path
.
join
(
dirpath
,
file
))
)
if
os
.
path
.
islink
(
file
))
for
linkpath
in
links
:
for
linkpath
in
links
:
all_links
.
add
(
linkpath
)
all_links
.
add
(
linkpath
)
m_hashes
=
compute_link_metadata
(
linkpath
)
m_hashes
=
compute_link_metadata
(
linkpath
)
hashes
.
append
(
m_hashes
)
hashes
.
append
(
m_hashes
)
only_files
=
(
os
.
path
.
join
(
dirpath
,
file
)
for
filepath
in
(
file
for
file
in
filenames
if
file
not
in
all_links
):
for
file
in
filenames
if
os
.
path
.
join
(
dirpath
,
file
)
not
in
all_links
)
for
filepath
in
only_files
:
m_hashes
=
compute_blob_metadata
(
filepath
)
m_hashes
=
compute_blob_metadata
(
filepath
)
hashes
.
append
(
m_hashes
)
hashes
.
append
(
m_hashes
)
ls_hashes
[
dirpath
]
=
hashes
ls_hashes
[
dirpath
]
=
hashes
dir_hashes
=
[]
dir_hashes
=
[]
subdirs
=
(
os
.
path
.
join
(
dirpath
,
dir
)
for
fulldirname
in
(
dir
for
dir
in
dirnames
if
dir
not
in
all_links
):
for
dir
in
dirnames
if
os
.
path
.
join
(
dirpath
,
dir
)
not
in
all_links
)
for
fulldirname
in
subdirs
:
tree_hash
=
compute_tree_metadata
(
fulldirname
,
ls_hashes
)
tree_hash
=
compute_tree_metadata
(
fulldirname
,
ls_hashes
)
dir_hashes
.
append
(
tree_hash
)
dir_hashes
.
append
(
tree_hash
)
...
@@ -398,7 +478,7 @@ def commonpath(paths):
...
@@ -398,7 +478,7 @@ def commonpath(paths):
def
__remove_paths_from_objects
(
objects
,
rootpaths
,
def
__remove_paths_from_objects
(
objects
,
rootpaths
,
dir_ok_fn
=
lambda
dirpath
:
True
):
dir_ok_fn
=
default_validation_dir
):
"""
Given top paths to remove, remove all paths and descendants from
"""
Given top paths to remove, remove all paths and descendants from
objects.
objects.
...
@@ -441,7 +521,7 @@ def __remove_paths_from_objects(objects, rootpaths,
...
@@ -441,7 +521,7 @@ def __remove_paths_from_objects(objects, rootpaths,
def
update_checksums_from
(
changed_paths
,
objects
,
def
update_checksums_from
(
changed_paths
,
objects
,
dir_ok_fn
=
lambda
dirpath
:
True
,
dir_ok_fn
=
default_validation_dir
,
remove_empty_folder
=
False
):
remove_empty_folder
=
False
):
"""
Given a list of changed paths, recompute the checksums only where
"""
Given a list of changed paths, recompute the checksums only where
needed.
needed.
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment