Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
swh-lister
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Antoine R. Dumont
swh-lister
Commits
f1adec6f
Commit
f1adec6f
authored
9 years ago
by
Stefano Zacchiroli
Browse files
Options
Downloads
Patches
Plain Diff
pimp_db: add plpgsql function to extract random repos sample
parent
48f93a93
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
sql/pimp_db.sql
+69
-6
69 additions, 6 deletions
sql/pimp_db.sql
with
69 additions
and
6 deletions
sql/pimp_db.sql
+
69
−
6
View file @
f1adec6f
...
...
@@ -26,11 +26,74 @@ CREATE TABLE repos_history (
CREATE
VIEW
repo_creations
AS
SELECT
today
.
ts
::
date
as
date
,
today
.
repos
-
yesterday
.
repos
as
repos
,
today
.
fork_repos
-
yesterday
.
fork_repos
as
fork_repos
,
today
.
orig_repos
-
yesterday
.
orig_repos
as
orig_repos
today
.
repos
-
yesterday
.
repos
as
repos
,
today
.
fork_repos
-
yesterday
.
fork_repos
as
fork_repos
,
today
.
orig_repos
-
yesterday
.
orig_repos
as
orig_repos
FROM
repos_history
today
JOIN
repos_history
yesterday
ON
(
yesterday
.
ts
=
(
SELECT
max
(
ts
)
FROM
repos_history
WHERE
ts
<
today
.
ts
));
(
yesterday
.
ts
=
(
SELECT
max
(
ts
)
FROM
repos_history
WHERE
ts
<
today
.
ts
));
-- -- return a random sample of repos, containing %percent repositories
-- create or replace function repos_random_sample_array(percent real)
-- returns setof repos as $$
-- declare
-- samples integer;
-- repo repos%rowtype;
-- ids integer[];
-- begin
-- select floor(count(*) / 100 * percent) into samples from repos;
-- ids := array(select id from repos order by id);
-- for i in 1 .. samples loop
-- select * into repo
-- from repos
-- where id = ids[round(random() * samples)];
-- return next repo;
-- end loop;
-- return;
-- end
-- $$
-- language plpgsql;
-- return a random sample of repositories
create
or
replace
function
repos_random_sample
(
percent
real
)
returns
setof
repos
as
$$
declare
sample_size
integer
;
begin
select
floor
(
count
(
*
)
/
100
*
percent
)
into
sample_size
from
repos
;
return
query
select
*
from
repos
order
by
random
()
limit
sample_size
;
return
;
end
$$
language
plpgsql
;
-- -- return a random sample of repositories
-- create or replace function random_sample_sequence(percent real)
-- returns setof repos as $$
-- declare
-- sample_size integer;
-- seq_size integer;
-- min_id integer;
-- max_id integer;
-- begin
-- select floor(count(*) / 100 * percent) into sample_size from repos;
-- select min(id) into min_id from repos;
-- select max(id) into max_id from repos;
-- seq_size := sample_size * 3; -- IDs are sparse, generate a larger sequence
-- -- to have enough of them
-- return query
-- select * from repos
-- where id in
-- (select floor(random() * (max_id - min_id + 1))::integer
-- + min_id
-- from generate_series(1, seq_size))
-- order by random() limit sample_size;
-- return;
-- end
-- $$
-- language plpgsql;
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment