From 4f9780749d3a57b6d6573270417c94d7fb7fd3cc Mon Sep 17 00:00:00 2001
From: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Tue, 14 Feb 2023 11:35:32 +0100
Subject: [PATCH] PopularContents: Make a single copy of the graph per thread

Copying it 1k or 10k for each thread is wasteful, even though they are
lightweight copies.

This brings the runtime down from 133 to ~100 hours.
---
 .../graph/utils/PopularContents.java                | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/java/src/main/java/org/softwareheritage/graph/utils/PopularContents.java b/java/src/main/java/org/softwareheritage/graph/utils/PopularContents.java
index 58660f02..5eac1fe9 100644
--- a/java/src/main/java/org/softwareheritage/graph/utils/PopularContents.java
+++ b/java/src/main/java/org/softwareheritage/graph/utils/PopularContents.java
@@ -37,6 +37,10 @@ import org.slf4j.LoggerFactory;
 
 public class PopularContents {
     private SwhBidirectionalGraph graph;
+    /*
+     * A copy of the graph for each thread to reuse between calls to processChunk
+     */
+    private ThreadLocal<SwhBidirectionalGraph> threadGraph;
     private int NUM_THREADS = 96;
 
     final static Logger logger = LoggerFactory.getLogger(PopularContents.class);
@@ -52,6 +56,7 @@ public class PopularContents {
         long popularityThreshold = Long.parseLong(args[2]);
 
         PopularContents popular_contents = new PopularContents();
+        popular_contents.threadGraph = new ThreadLocal<SwhBidirectionalGraph>();
 
         popular_contents.loadGraph(graphPath);
 
@@ -70,7 +75,7 @@ public class PopularContents {
         System.out.format("SWHID,length,filename,occurrences\n");
 
         long totalNodes = graph.numNodes();
-        long numChunks = NUM_THREADS * 10000;
+        long numChunks = NUM_THREADS * 1000;
 
         ProgressLogger pl = new ProgressLogger(logger);
         pl.itemsName = "nodes";
@@ -94,9 +99,13 @@ public class PopularContents {
 
     private void processChunk(long numChunks, long chunkId, int maxResults, long popularityThreshold,
             ProgressLogger pl) {
+        if (threadGraph.get() == null) {
+            threadGraph.set(this.graph.copy());
+        }
+        SwhBidirectionalGraph graph = threadGraph.get();
         long totalNodes = graph.numNodes();
         HashMap<Long, Long> names = new HashMap<>();
-        SwhUnidirectionalGraph backwardGraph = graph.getBackwardGraph().copy();
+        SwhUnidirectionalGraph backwardGraph = graph.getBackwardGraph();
 
         long chunkSize = totalNodes / numChunks;
         long chunkStart = chunkSize * chunkId;
-- 
GitLab