Skip to content
Snippets Groups Projects
Commit ed28ae74 authored by vlorentz's avatar vlorentz Committed by vlorentz
Browse files

Update dataset-writer to avoid exceeding Parquet's max row-group count

parent 0a8abbe6
No related branches found
No related tags found
No related merge requests found
......@@ -1260,6 +1260,22 @@ dependencies = [
"zstd 0.12.4",
]
[[package]]
name = "dataset-writer"
version = "1.1.0"
source = "git+https://gitlab.softwareheritage.org/swh/devel/dataset-writer-rs.git?rev=c76569daa9bdd1da07c1b964708c007a05ee0d84#c76569daa9bdd1da07c1b964708c007a05ee0d84"
dependencies = [
"anyhow",
"arrow",
"arrow-array",
"arrow-schema",
"csv",
"parquet",
"rayon",
"thread_local",
"zstd 0.12.4",
]
[[package]]
name = "debugid"
version = "0.8.0"
......@@ -4003,7 +4019,7 @@ dependencies = [
"byteorder",
"chrono",
"clap",
"dataset-writer",
"dataset-writer 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"dsi-progress-logger",
"env_logger 0.11.5",
"log",
......@@ -4040,7 +4056,7 @@ dependencies = [
"anyhow",
"clap",
"csv",
"dataset-writer",
"dataset-writer 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"dsi-progress-logger",
"env_logger 0.11.5",
"itertools 0.11.0",
......@@ -4065,7 +4081,7 @@ dependencies = [
"clap",
"csv",
"dashmap",
"dataset-writer",
"dataset-writer 1.1.0 (git+https://gitlab.softwareheritage.org/swh/devel/dataset-writer-rs.git?rev=c76569daa9bdd1da07c1b964708c007a05ee0d84)",
"dsi-progress-logger",
"env_logger 0.11.5",
"log",
......@@ -4097,7 +4113,7 @@ dependencies = [
"bytemuck",
"clap",
"csv",
"dataset-writer",
"dataset-writer 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"dsi-bitstream",
"dsi-progress-logger",
"env_logger 0.11.5",
......
......@@ -15,7 +15,8 @@ chrono = { version = "0.4.31", features = ["serde"] }
clap = { version = "4.1.6", features = ["derive"] }
csv = "1.3.0"
dashmap = "6.1.0"
dataset-writer = { version = "1.1.0", features = ["parquet"] }
#dataset-writer = { version = "1.2.0", features = ["parquet"] }
dataset-writer = { git = "https://gitlab.softwareheritage.org/swh/devel/dataset-writer-rs.git", rev = "c76569daa9bdd1da07c1b964708c007a05ee0d84", features = ["parquet"] }
dsi-progress-logger = "0.2.4"
log = "0.4.17"
parquet = { version = "53.1.0", default-features = false, features = ["arrow", "zstd"] }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment