diff --git a/swh/storage/replay.py b/swh/storage/replay.py
index 0516ba853cc323f15035bb50a6bf617e5a5f4a36..52ce8dd10388c6cfbf86bf44052e16e0b6d505ff 100644
--- a/swh/storage/replay.py
+++ b/swh/storage/replay.py
@@ -4,7 +4,7 @@
 # See top-level LICENSE file for more information
 
 import logging
-from typing import Any, Callable, Dict, Iterable, List
+from typing import Any, Callable, Container, Dict, Iterable, List
 
 try:
     from systemd.daemon import notify
@@ -103,6 +103,11 @@ def collision_aware_content_add(
             logger.error("Collision detected: %(collision)s", {"collision": collision})
 
 
+def dict_key_dropper(d: Dict, keys_to_drop: Container) -> Dict:
+    """Returns a copy of the dict d without any key listed in keys_to_drop"""
+    return {k: v for (k, v) in d.items() if k not in keys_to_drop}
+
+
 def _insert_objects(object_type: str, objects: List[Dict], storage) -> None:
     """Insert objects of type object_type in the storage.
 
@@ -146,6 +151,18 @@ def _insert_objects(object_type: str, objects: List[Dict], storage) -> None:
         storage.metadata_authority_add(authorities)
         storage.metadata_fetcher_add(fetchers)
         storage.raw_extrinsic_metadata_add(converted)
+    elif object_type == "revision":
+        # drop the metadata field from the revision (is any); this field is
+        # about to be dropped from the data model (in favor of
+        # raw_extrinsic_metadata) and there can be bogus values in the existing
+        # journal (metadata with \0000 in it)
+        method = getattr(storage, object_type + "_add")
+        method(
+            [
+                object_converter_fn[object_type](dict_key_dropper(o, ("metadata",)))
+                for o in objects
+            ]
+        )
     elif object_type in (
         "directory",
         "extid",
diff --git a/swh/storage/tests/test_replay.py b/swh/storage/tests/test_replay.py
index cda12ed342c4d62f767db8639f296e5b158598d5..6562f2aa253e1ac52c455628e4485f2ea765f9f3 100644
--- a/swh/storage/tests/test_replay.py
+++ b/swh/storage/tests/test_replay.py
@@ -14,8 +14,15 @@ import pytest
 
 from swh.journal.client import JournalClient
 from swh.journal.serializers import key_to_kafka, value_to_kafka
-from swh.model.hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_hex
-from swh.model.tests.swh_model_data import DUPLICATE_CONTENTS, TEST_OBJECTS
+from swh.model.hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytes, hash_to_hex
+from swh.model.model import Revision, RevisionType
+from swh.model.tests.swh_model_data import (
+    COMMITTERS,
+    DATES,
+    DUPLICATE_CONTENTS,
+    REVISIONS,
+)
+from swh.model.tests.swh_model_data import TEST_OBJECTS as _TEST_OBJECTS
 from swh.storage import get_storage
 from swh.storage.cassandra.model import ContentRow, SkippedContentRow
 from swh.storage.in_memory import InMemoryStorage
@@ -23,6 +30,23 @@ from swh.storage.replay import process_replay_objects
 
 UTC = datetime.timezone.utc
 
+TEST_OBJECTS = _TEST_OBJECTS.copy()
+TEST_OBJECTS["revision"] = list(_TEST_OBJECTS["revision"]) + [
+    Revision(
+        id=hash_to_bytes("a569b03ebe6e5f9f2f6077355c40d89bd6986d0c"),
+        message=b"hello again",
+        date=DATES[1],
+        committer=COMMITTERS[1],
+        author=COMMITTERS[0],
+        committer_date=DATES[0],
+        type=RevisionType.GIT,
+        directory=b"\x03" * 20,
+        synthetic=False,
+        metadata={"something": "interesting"},
+        parents=(REVISIONS[0].id,),
+    ),
+]
+
 
 def nullify_ctime(obj):
     if isinstance(obj, (ContentRow, SkippedContentRow)):
@@ -212,6 +236,10 @@ def check_replayed(
                     author=row.author.anonymize(),
                     committer=row.committer.anonymize(),
                 )
+        if attr == "revisions":
+            # the replayer should now drop the metadata attribute; see
+            # swh/storgae/replay.py:_insert_objects()
+            row.metadata = "null"
 
         return row