Skip to content
Snippets Groups Projects
Commit 7fa20132 authored by vlorentz's avatar vlorentz
Browse files

Replace `entry_to_bytes` with psycopg2's typecast

`entry_to_bytes` and its friends were called many times (eg.
entry_to_bytes alone was called 40k times while indexing 500
origins with the metadata indexer), and its use of `isinstance`
used a non-negligible amount of CPU time.

Instead of using `*_to_bytes` function as post-processing on
all bits of data returned by postgresql, this patch tells psycopg2
to use a new `typecast_bytea` function when needed (in `adapt_conn`).
This function deffers the decoding work to psycopg2, which returns a
`memoryview`, which is turned into `bytes`.
parent 0e277770
No related branches found
Tags 0.0.51
1 merge request!38Replace `entry_to_bytes` with psycopg2's typecast
......@@ -52,6 +52,12 @@ def escape(data):
return str(data)
def typecast_bytea(value, cur):
if value is not None:
data = psycopg2.BINARY(value, cur)
return data.tobytes()
class BaseDb:
"""Base class for swh.*.*Db.
......@@ -59,6 +65,23 @@ class BaseDb:
"""
@classmethod
def adapt_conn(cls, conn):
"""Makes psycopg2 use 'bytes' to decode bytea instead of
'memoryview', for this connection."""
cur = conn.cursor()
cur.execute("SELECT null::bytea, null::bytea[]")
bytea_oid = cur.description[0][1]
bytea_array_oid = cur.description[1][1]
t_bytes = psycopg2.extensions.new_type(
(bytea_oid,), "bytea", typecast_bytea)
psycopg2.extensions.register_type(t_bytes, conn)
t_bytes_array = psycopg2.extensions.new_array_type(
(bytea_array_oid,), "bytea[]", t_bytes)
psycopg2.extensions.register_type(t_bytes_array, conn)
@classmethod
def connect(cls, *args, **kwargs):
"""factory method to create a DB proxy
......@@ -71,11 +94,14 @@ class BaseDb:
"""
conn = psycopg2.connect(*args, **kwargs)
cls.adapt_conn(conn)
return cls(conn)
@classmethod
def from_pool(cls, pool):
return cls(pool.getconn(), pool=pool)
conn = pool.getconn()
cls.adapt_conn(conn)
return cls(conn, pool=pool)
def __init__(self, conn, pool=None):
"""create a DB proxy
......
......@@ -44,34 +44,6 @@ def jsonize(value):
return value
def entry_to_bytes(entry):
"""Convert an entry coming from the database to bytes"""
if isinstance(entry, memoryview):
return entry.tobytes()
if isinstance(entry, list):
return [entry_to_bytes(value) for value in entry]
return entry
def line_to_bytes(line):
"""Convert a line coming from the database to bytes"""
if not line:
return line
if isinstance(line, dict):
return {k: entry_to_bytes(v) for k, v in line.items()}
return line.__class__(entry_to_bytes(entry) for entry in line)
def cursor_to_bytes(cursor):
"""Yield all the data from a cursor as bytes"""
yield from (line_to_bytes(line) for line in cursor)
def execute_values_to_bytes(*args, **kwargs):
for line in execute_values_generator(*args, **kwargs):
yield line_to_bytes(line)
def _paginate(seq, page_size):
"""Consume an iterable and return it in chunks.
Every chunk is at most `page_size`. Never return an empty chunk.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment