Skip to content

syncoid: Try to restart the synchronization if a race condition occurred

As syncoid is able to restart an interrupted transfer, it should do the job until a patched version of the sanoid package is provided

Related to T3968

Test Plan

diff origin/production/db1.internal.staging.swh.network current/db1.internal.staging.swh.network
*******************************************
  File[/etc/systemd/system/syncoid-storage1-kafka.service] =>
   parameters =>
     content =>
      @@ -4,4 +4,6 @@
       [Unit]
       Description=ZFS dataset synchronization of_
      +StartLimitIntervalSec=5min
      +StartLimitBurst=2
      _
       [Service]
      @@ -10,4 +12,6 @@
       Group=root
       ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/kafka data/sync/storage1/kafka
      +Restart=on-failure
      +RestartSec=1s
      _
       [Install]
*******************************************
  File[/etc/systemd/system/syncoid-storage1-objects.service] =>
   parameters =>
     content =>
      @@ -4,4 +4,6 @@
       [Unit]
       Description=ZFS dataset synchronization of_
      +StartLimitIntervalSec=5min
      +StartLimitBurst=2
      _
       [Service]
      @@ -10,4 +12,6 @@
       Group=root
       ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/objects data/sync/storage1/objects
      +Restart=on-failure
      +RestartSec=1s
      _
       [Install]
*******************************************
  Systemd::Timer[syncoid-storage1-kafka.timer] =>
   parameters =>
     service_content =>
      @@ -4,4 +4,6 @@
       [Unit]
       Description=ZFS dataset synchronization of_
      +StartLimitIntervalSec=5min
      +StartLimitBurst=2
      _
       [Service]
      @@ -10,4 +12,6 @@
       Group=root
       ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/kafka data/sync/storage1/kafka
      +Restart=on-failure
      +RestartSec=1s
      _
       [Install]
*******************************************
  Systemd::Timer[syncoid-storage1-objects.timer] =>
   parameters =>
     service_content =>
      @@ -4,4 +4,6 @@
       [Unit]
       Description=ZFS dataset synchronization of_
      +StartLimitIntervalSec=5min
      +StartLimitBurst=2
      _
       [Service]
      @@ -10,4 +12,6 @@
       Group=root
       ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/objects data/sync/storage1/objects
      +Restart=on-failure
      +RestartSec=1s
      _
       [Install]
*******************************************
  Systemd::Unit_file[syncoid-storage1-kafka.service] =>
   parameters =>
     content =>
      @@ -4,4 +4,6 @@
       [Unit]
       Description=ZFS dataset synchronization of_
      +StartLimitIntervalSec=5min
      +StartLimitBurst=2
      _
       [Service]
      @@ -10,4 +12,6 @@
       Group=root
       ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/kafka data/sync/storage1/kafka
      +Restart=on-failure
      +RestartSec=1s
      _
       [Install]
*******************************************
  Systemd::Unit_file[syncoid-storage1-objects.service] =>
   parameters =>
     content =>
      @@ -4,4 +4,6 @@
       [Unit]
       Description=ZFS dataset synchronization of_
      +StartLimitIntervalSec=5min
      +StartLimitBurst=2
      _
       [Service]
      @@ -10,4 +12,6 @@
       Group=root
       ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/objects data/sync/storage1/objects
      +Restart=on-failure
      +RestartSec=1s
      _
       [Install]
*******************************************
*** End octocatalog-diff on db1.internal.staging.swh.network

Migrated from D7216 (view on Phabricator)

Merge request reports