Skip to content
Snippets Groups Projects
Commit 32f2626a authored by Antoine Pietri's avatar Antoine Pietri
Browse files

Generate .labelobl files to speed-up labelled graph loading

parent 795fa182
Branches labelobl
No related tags found
No related merge requests found
......@@ -131,7 +131,7 @@ The compression pipeline consists of the following steps:
.. figure:: images/compression_steps.png
:align: center
:alt: Compression steps
:target: _images/compression_steps.png
:scale: 20%
Compression steps
......@@ -587,8 +587,23 @@ equivalent of these files:
``graph-transposed-labelled.{properties,labels,labeloffsets}``.
23. EDGE_LABELS_OBL
-------------------
Cache the label offsets of the forward labelled graph to make loading faster.
The resulting label offset big list is stored in the
``graph-labelled.labelobl`` file.
23. EDGE_LABELS_TRANSPOSE_OBL
-----------------------------
Same as EDGE_LABELS_OBL, but for the transposed labelled graph.
The resulting label offset big list is stored in the
``graph-transposed-labelled.labelobl`` file.
22. CLEAN_TMP
24. CLEAN_TMP
-------------
This step reclaims space by deleting the temporary directory, as well as all
......
......@@ -31,6 +31,8 @@ digraph "Compression steps" {
labels_fcl [label="graph.labels.fcl"];
graph_labelled [label="graph-labelled.*"];
graph_transposed_labelled [label="graph-transposed-labelled.*"];
graph_labelled_obl [label="graph-labelled.labelobl"];
graph_transposed_labelled [label="graph-transposed-labelled.labelobl"];
subgraph {
node [shape=box, fontname="Courier New"];
......@@ -55,6 +57,8 @@ digraph "Compression steps" {
MPH_LABELS;
FCL_LABELS;
EDGE_LABELS;
EDGE_LABELS_OBL;
EDGE_LABELS_TRANSPOSE_OBL;
}
......@@ -102,4 +106,6 @@ digraph "Compression steps" {
graph_order -> EDGE_LABELS;
EDGE_LABELS -> graph_labelled;
EDGE_LABELS -> graph_transposed_labelled;
graph_labelled -> EDGE_LABELS_OBL -> graph_labelled_obl;
graph_transposed_labelled -> EDGE_LABELS_TRANSPOSE_OBL -> graph_transposed_labelled_obl;
}
......@@ -176,7 +176,7 @@ def compress(ctx, input_dataset, output_directory, graph_name, steps):
(10) obl, (11) compose_orders, (12) stats, (13) transpose, (14)
transpose_obl, (15) maps, (16) extract_persons, (17) mph_persons, (18)
node_properties, (19) mph_labels, (20) fcl_labels, (21) edge_labels, (22)
clean_tmp.
edge_labels_obl, (23) edge_labels_transpose_obl, (24) clean_tmp.
Compression steps can be selected by name or number using --steps,
separating them with commas; step ranges (e.g., 3-9, 6-, etc.) are also
supported.
......
......@@ -42,7 +42,9 @@ class CompressionStep(Enum):
MPH_LABELS = 19
FCL_LABELS = 20
EDGE_LABELS = 21
CLEAN_TMP = 22
EDGE_LABELS_OBL = 22
EDGE_LABELS_TRANSPOSE_OBL = 23
CLEAN_TMP = 24
def __str__(self):
return self.name
......@@ -231,6 +233,18 @@ STEP_ARGV: Dict[CompressionStep, List[str]] = {
"{in_dir}",
"{out_dir}/{graph_name}",
],
CompressionStep.EDGE_LABELS_OBL: [
"{java}",
"it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph",
"--list",
"{out_dir}/{graph_name}-labelled",
],
CompressionStep.EDGE_LABELS_TRANSPOSE_OBL: [
"{java}",
"it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph",
"--list",
"{out_dir}/{graph_name}-transposed-labelled",
],
CompressionStep.CLEAN_TMP: [
"rm",
"-rf",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment