Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
swh-lister
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Platform
Development
swh-lister
Merge requests
!543
Remove no longer used sql directory
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Remove no longer used sql directory
anlambert/swh-lister:delete-unused-sql-folder
into
master
Overview
3
Commits
1
Pipelines
2
Changes
2
Merged
Antoine Lambert
requested to merge
anlambert/swh-lister:delete-unused-sql-folder
into
master
2 months ago
Overview
3
Commits
1
Pipelines
2
Changes
2
Expand
0
0
Merge request reports
Compare
master
version 1
f51feb59
2 months ago
master (base)
and
latest version
latest version
4d14e892
1 commit,
2 months ago
version 1
f51feb59
1 commit,
2 months ago
2 files
+
0
−
142
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
2
Search (e.g. *.vue) (Ctrl+P)
sql/crawler.sql deleted
100644 → 0
+
0
−
106
Options
-- -- return a random sample of repos, containing %percent repositories
-- create or replace function repos_random_sample_array(percent real)
-- returns setof repos as $$
-- declare
-- samples integer;
-- repo repos%rowtype;
-- ids integer[];
-- begin
-- select floor(count(*) / 100 * percent) into samples from repos;
-- ids := array(select id from repos order by id);
-- for i in 1 .. samples loop
-- select * into repo
-- from repos
-- where id = ids[round(random() * samples)];
-- return next repo;
-- end loop;
-- return;
-- end
-- $$
-- language plpgsql;
-- return a random sample of repositories
create
or
replace
function
repos_random_sample
(
percent
real
)
returns
setof
repos
as
$$
declare
sample_size
integer
;
begin
select
floor
(
count
(
*
)
/
100
*
percent
)
into
sample_size
from
repos
;
return
query
select
*
from
repos
order
by
random
()
limit
sample_size
;
return
;
end
$$
language
plpgsql
;
-- -- return a random sample of repositories
-- create or replace function random_sample_sequence(percent real)
-- returns setof repos as $$
-- declare
-- sample_size integer;
-- seq_size integer;
-- min_id integer;
-- max_id integer;
-- begin
-- select floor(count(*) / 100 * percent) into sample_size from repos;
-- select min(id) into min_id from repos;
-- select max(id) into max_id from repos;
-- seq_size := sample_size * 3; -- IDs are sparse, generate a larger sequence
-- -- to have enough of them
-- return query
-- select * from repos
-- where id in
-- (select floor(random() * (max_id - min_id + 1))::integer
-- + min_id
-- from generate_series(1, seq_size))
-- order by random() limit sample_size;
-- return;
-- end
-- $$
-- language plpgsql;
create
or
replace
function
repos_well_known
()
returns
setof
repos
as
$$
begin
return
query
select
*
from
repos
where
full_name
like
'apache/%'
or
full_name
like
'eclipse/%'
or
full_name
like
'mozilla/%'
or
full_name
=
'torvalds/linux'
or
full_name
=
'gcc-mirror/gcc'
;
return
;
end
$$
language
plpgsql
;
create
table
crawl_history
(
id
bigserial
primary
key
,
repo
integer
references
repos
(
id
),
task_id
uuid
,
-- celery task id
date
timestamptz
not
null
,
duration
interval
,
status
boolean
,
result
json
,
stdout
text
,
stderr
text
);
create
index
on
crawl_history
(
repo
);
create
view
missing_orig_repos
AS
select
*
from
orig_repos
as
repos
where
not
exists
(
select
1
from
crawl_history
as
history
where
history
.
repo
=
repos
.
id
);
create
view
missing_fork_repos
AS
select
*
from
fork_repos
as
repos
where
not
exists
(
select
1
from
crawl_history
as
history
where
history
.
repo
=
repos
.
id
);
Loading