Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
S
swh-graph
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Platform
Development
swh-graph
Commits
bbc66409
Commit
bbc66409
authored
5 years ago
by
Antoine Pietri
Browse files
Options
Downloads
Patches
Plain Diff
java tools: ForkCC learned to print its largest component
parent
14b43783
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
java/src/main/java/org/softwareheritage/graph/benchmark/ForkCC.java
+49
-19
49 additions, 19 deletions
...ain/java/org/softwareheritage/graph/benchmark/ForkCC.java
with
49 additions
and
19 deletions
java/src/main/java/org/softwareheritage/graph/benchmark/ForkCC.java
+
49
−
19
View file @
bbc66409
...
...
@@ -4,11 +4,9 @@ import com.google.common.primitives.Longs;
import
com.martiansoftware.jsap.*
;
import
it.unimi.dsi.big.webgraph.ImmutableGraph
;
import
it.unimi.dsi.big.webgraph.LazyLongIterator
;
import
it.unimi.dsi.big.webgraph.algo.ConnectedComponents
;
import
it.unimi.dsi.big.webgraph.Transform
;
import
it.unimi.dsi.bits.LongArrayBitVector
;
import
it.unimi.dsi.fastutil.Arrays
;
import
it.unimi.dsi.fastutil.Hash
;
import
it.unimi.dsi.io.ByteDiskQueue
;
import
it.unimi.dsi.logging.ProgressLogger
;
import
org.softwareheritage.graph.Graph
;
...
...
@@ -16,18 +14,17 @@ import org.softwareheritage.graph.Node;
import
java.io.File
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.HashMap
;
import
java.util.Map
;
import
java.util.TreeMap
;
import
java.util.*
;
public
class
ForkCC
{
private
Graph
graph
;
private
Long
emptySnapshot
;
private
void
load_graph
(
String
graphBasename
)
throws
IOException
{
System
.
err
.
println
(
"Loading graph "
+
graphBasename
+
" ..."
);
this
.
graph
=
new
Graph
(
graphBasename
);
System
.
err
.
println
(
"Graph loaded."
);
this
.
emptySnapshot
=
null
;
}
private
static
JSAPResult
parse_args
(
String
[]
args
)
{
...
...
@@ -54,7 +51,17 @@ public class ForkCC {
return
config
;
}
private
Map
<
Long
,
Long
>
compute
(
final
ImmutableGraph
graph
,
ProgressLogger
pl
)
throws
IOException
{
private
boolean
nodeIsEmptySnapshot
(
Long
node
)
{
if
(
this
.
emptySnapshot
==
null
&&
this
.
graph
.
getNodeType
(
node
)
==
Node
.
Type
.
SNP
&&
this
.
graph
.
outdegree
(
node
)
==
0
)
{
System
.
err
.
println
(
"Found empty snapshot: "
+
node
);
this
.
emptySnapshot
=
node
;
}
return
node
.
equals
(
this
.
emptySnapshot
);
}
private
ArrayList
<
ArrayList
<
Long
>>
compute
(
final
ImmutableGraph
graph
,
ProgressLogger
pl
)
throws
IOException
{
final
long
n
=
graph
.
numNodes
();
// Allow enough memory to behave like in-memory queue
int
bufferSize
=
(
int
)
Math
.
min
(
Arrays
.
MAX_ARRAY_SIZE
&
~
0x7
,
8L
*
n
);
...
...
@@ -70,28 +77,31 @@ public class ForkCC {
pl
.
itemsName
=
"nodes"
;
pl
.
start
(
"Starting connected components visit..."
);
long
pos
=
0
;
TreeMap
<
Long
,
Long
>
ccDistribution
=
new
TreeMap
<>();
ArrayList
<
ArrayList
<
Long
>>
components
=
new
ArrayList
<>();
for
(
long
i
=
0
;
i
<
n
;
i
++)
{
if
(
this
.
graph
.
getNodeType
(
i
)
==
Node
.
Type
.
DIR
)
continue
;
if
(
this
.
nodeIsEmptySnapshot
(
i
))
continue
;
if
(
visited
.
getBoolean
(
i
))
continue
;
long
originCount
=
0
;
ArrayList
<
Long
>
component
=
new
ArrayList
<>()
;
queue
.
enqueue
(
Longs
.
toByteArray
(
i
));
visited
.
set
(
i
);
component
.
add
(
i
);
while
(!
queue
.
isEmpty
())
{
queue
.
dequeue
(
byteBuf
);
final
long
currentNode
=
Longs
.
fromByteArray
(
byteBuf
);
if
(
this
.
graph
.
getNodeType
(
currentNode
)
==
Node
.
Type
.
ORI
)
++
originCount
;
if
(
this
.
graph
.
getNodeType
(
currentNode
)
==
Node
.
Type
.
ORI
)
{
component
.
add
(
currentNode
);
}
final
LazyLongIterator
iterator
=
graph
.
successors
(
currentNode
);
long
succ
;
while
((
succ
=
iterator
.
nextLong
())
!=
-
1
)
{
if
(
this
.
graph
.
getNodeType
(
succ
)
==
Node
.
Type
.
DIR
)
continue
;
if
(
this
.
nodeIsEmptySnapshot
(
succ
))
continue
;
if
(!
visited
.
getBoolean
(
succ
))
{
visited
.
set
(
succ
);
...
...
@@ -102,20 +112,39 @@ public class ForkCC {
pl
.
update
();
}
ccDistribution
.
merge
(
originCount
,
1L
,
Long:
:
sum
);
// ccDistribution.merge(originCount, 1L, Long::sum);
components
.
add
(
component
);
}
pl
.
done
();
queue
.
close
();
return
c
cDistribution
;
return
c
omponents
;
}
private
static
void
printDistribution
(
Map
<
Long
,
Long
>
distribution
)
{
private
static
void
printDistribution
(
ArrayList
<
ArrayList
<
Long
>>
components
)
{
TreeMap
<
Long
,
Long
>
distribution
=
new
TreeMap
<>();
for
(
ArrayList
<
Long
>
component
:
components
)
{
distribution
.
merge
((
long
)
component
.
size
(),
1L
,
Long:
:
sum
);
}
for
(
Map
.
Entry
<
Long
,
Long
>
entry
:
distribution
.
entrySet
())
{
System
.
out
.
format
(
"%d %d\n"
,
entry
.
getKey
(),
entry
.
getValue
());
}
}
private
static
void
printLargestComponent
(
ArrayList
<
ArrayList
<
Long
>>
components
)
{
int
indexLargest
=
0
;
for
(
int
i
=
1
;
i
<
components
.
size
();
++
i
)
{
if
(
components
.
get
(
i
).
size
()
>
components
.
get
(
indexLargest
).
size
())
indexLargest
=
i
;
}
ArrayList
<
Long
>
component
=
components
.
get
(
indexLargest
);
for
(
Long
node
:
component
)
{
System
.
out
.
println
(
node
);
}
}
public
static
void
main
(
String
[]
args
)
{
JSAPResult
config
=
parse_args
(
args
);
...
...
@@ -131,14 +160,15 @@ public class ForkCC {
}
ImmutableGraph
symmetric
=
Transform
.
union
(
forkCc
.
graph
.
getBVGraph
(
false
),
forkCc
.
graph
.
getBVGraph
(
true
)
forkCc
.
graph
.
getBVGraph
(
false
),
forkCc
.
graph
.
getBVGraph
(
true
)
);
ProgressLogger
logger
=
new
ProgressLogger
();
try
{
Map
<
Long
,
Long
>
distribution
=
forkCc
.
compute
(
symmetric
,
logger
);
printDistribution
(
distribution
);
ArrayList
<
ArrayList
<
Long
>>
components
=
forkCc
.
compute
(
symmetric
,
logger
);
printDistribution
(
components
);
// printLargestComponent(components);
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment