Skip to content
Snippets Groups Projects
Commit 4636d1c1 authored by Antoine Pietri's avatar Antoine Pietri
Browse files

Add two ORC tools (orc-merge, orc-print-contents)

parent ab6191bc
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Merge multiple ORC files into a single one."""
import argparse
import pyorc
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"-o", "--output", type=argparse.FileType(mode="wb"), required=True
)
parser.add_argument("files", type=argparse.FileType(mode="rb"), nargs="+")
args = parser.parse_args()
schema = str(pyorc.Reader(args.files[0]).schema)
with pyorc.Writer(args.output, schema) as writer:
for i, f in enumerate(args.files):
reader = pyorc.Reader(f)
if str(reader.schema) != schema:
raise RuntimeError(
"Inconsistent ORC schemas.\n"
"\tFirst file schema: {}\n"
"\tFile #{} schema: {}".format(schema, i, str(reader.schema))
)
for line in reader:
writer.write(line)
if __name__ == "__main__":
main()
#!/usr/bin/env python3
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Print the contents of an ORC file."""
import argparse
import pyorc
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("files", type=argparse.FileType(mode="rb"), nargs="+")
args = parser.parse_args()
for orc_file in args.files:
reader = pyorc.Reader(orc_file)
for row in reader:
print(row)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment