Skip to content
Snippets Groups Projects
Commit bbc66409 authored by Antoine Pietri's avatar Antoine Pietri
Browse files

java tools: ForkCC learned to print its largest component

parent 14b43783
No related branches found
No related tags found
No related merge requests found
......@@ -4,11 +4,9 @@ import com.google.common.primitives.Longs;
import com.martiansoftware.jsap.*;
import it.unimi.dsi.big.webgraph.ImmutableGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.big.webgraph.algo.ConnectedComponents;
import it.unimi.dsi.big.webgraph.Transform;
import it.unimi.dsi.bits.LongArrayBitVector;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.fastutil.Hash;
import it.unimi.dsi.io.ByteDiskQueue;
import it.unimi.dsi.logging.ProgressLogger;
import org.softwareheritage.graph.Graph;
......@@ -16,18 +14,17 @@ import org.softwareheritage.graph.Node;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.*;
public class ForkCC {
private Graph graph;
private Long emptySnapshot;
private void load_graph(String graphBasename) throws IOException {
System.err.println("Loading graph " + graphBasename + " ...");
this.graph = new Graph(graphBasename);
System.err.println("Graph loaded.");
this.emptySnapshot = null;
}
private static JSAPResult parse_args(String[] args) {
......@@ -54,7 +51,17 @@ public class ForkCC {
return config;
}
private Map<Long, Long> compute(final ImmutableGraph graph, ProgressLogger pl) throws IOException {
private boolean nodeIsEmptySnapshot(Long node) {
if (this.emptySnapshot == null
&& this.graph.getNodeType(node) == Node.Type.SNP
&& this.graph.outdegree(node) == 0) {
System.err.println("Found empty snapshot: " + node);
this.emptySnapshot = node;
}
return node.equals(this.emptySnapshot);
}
private ArrayList<ArrayList<Long>> compute(final ImmutableGraph graph, ProgressLogger pl) throws IOException {
final long n = graph.numNodes();
// Allow enough memory to behave like in-memory queue
int bufferSize = (int)Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n);
......@@ -70,28 +77,31 @@ public class ForkCC {
pl.itemsName = "nodes";
pl.start("Starting connected components visit...");
long pos = 0;
TreeMap<Long, Long> ccDistribution = new TreeMap<>();
ArrayList<ArrayList<Long>> components = new ArrayList<>();
for (long i = 0; i < n; i++) {
if (this.graph.getNodeType(i) == Node.Type.DIR) continue;
if (this.nodeIsEmptySnapshot(i)) continue;
if (visited.getBoolean(i)) continue;
long originCount = 0;
ArrayList<Long> component = new ArrayList<>();
queue.enqueue(Longs.toByteArray(i));
visited.set(i);
component.add(i);
while (!queue.isEmpty()) {
queue.dequeue(byteBuf);
final long currentNode = Longs.fromByteArray(byteBuf);
if (this.graph.getNodeType(currentNode) == Node.Type.ORI)
++originCount;
if (this.graph.getNodeType(currentNode) == Node.Type.ORI) {
component.add(currentNode);
}
final LazyLongIterator iterator = graph.successors(currentNode);
long succ;
while((succ = iterator.nextLong()) != -1) {
if (this.graph.getNodeType(succ) == Node.Type.DIR) continue;
if (this.nodeIsEmptySnapshot(succ)) continue;
if (!visited.getBoolean(succ)) {
visited.set(succ);
......@@ -102,20 +112,39 @@ public class ForkCC {
pl.update();
}
ccDistribution.merge(originCount, 1L, Long::sum);
// ccDistribution.merge(originCount, 1L, Long::sum);
components.add(component);
}
pl.done();
queue.close();
return ccDistribution;
return components;
}
private static void printDistribution(Map<Long, Long> distribution) {
private static void printDistribution(ArrayList<ArrayList<Long>> components) {
TreeMap<Long, Long> distribution = new TreeMap<>();
for (ArrayList<Long> component : components) {
distribution.merge((long) component.size(), 1L, Long::sum);
}
for (Map.Entry<Long, Long> entry : distribution.entrySet()) {
System.out.format("%d %d\n", entry.getKey(), entry.getValue());
}
}
private static void printLargestComponent(ArrayList<ArrayList<Long>> components) {
int indexLargest = 0;
for (int i = 1; i < components.size(); ++i) {
if (components.get(i).size() > components.get(indexLargest).size())
indexLargest = i;
}
ArrayList<Long> component = components.get(indexLargest);
for (Long node : component) {
System.out.println(node);
}
}
public static void main(String[] args) {
JSAPResult config = parse_args(args);
......@@ -131,14 +160,15 @@ public class ForkCC {
}
ImmutableGraph symmetric = Transform.union(
forkCc.graph.getBVGraph(false),
forkCc.graph.getBVGraph(true)
forkCc.graph.getBVGraph(false),
forkCc.graph.getBVGraph(true)
);
ProgressLogger logger = new ProgressLogger();
try {
Map<Long, Long> distribution = forkCc.compute(symmetric, logger);
printDistribution(distribution);
ArrayList<ArrayList<Long>> components = forkCc.compute(symmetric, logger);
printDistribution(components);
// printLargestComponent(components);
} catch (IOException e) {
e.printStackTrace();
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment