syncoid: Try to restart the synchronization if a race condition occurred
As syncoid is able to restart an interrupted transfer, it should do the job until a patched version of the sanoid package is provided
Related to T3968
Test Plan
diff origin/production/db1.internal.staging.swh.network current/db1.internal.staging.swh.network
*******************************************
File[/etc/systemd/system/syncoid-storage1-kafka.service] =>
parameters =>
content =>
@@ -4,4 +4,6 @@
[Unit]
Description=ZFS dataset synchronization of_
+StartLimitIntervalSec=5min
+StartLimitBurst=2
_
[Service]
@@ -10,4 +12,6 @@
Group=root
ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/kafka data/sync/storage1/kafka
+Restart=on-failure
+RestartSec=1s
_
[Install]
*******************************************
File[/etc/systemd/system/syncoid-storage1-objects.service] =>
parameters =>
content =>
@@ -4,4 +4,6 @@
[Unit]
Description=ZFS dataset synchronization of_
+StartLimitIntervalSec=5min
+StartLimitBurst=2
_
[Service]
@@ -10,4 +12,6 @@
Group=root
ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/objects data/sync/storage1/objects
+Restart=on-failure
+RestartSec=1s
_
[Install]
*******************************************
Systemd::Timer[syncoid-storage1-kafka.timer] =>
parameters =>
service_content =>
@@ -4,4 +4,6 @@
[Unit]
Description=ZFS dataset synchronization of_
+StartLimitIntervalSec=5min
+StartLimitBurst=2
_
[Service]
@@ -10,4 +12,6 @@
Group=root
ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/kafka data/sync/storage1/kafka
+Restart=on-failure
+RestartSec=1s
_
[Install]
*******************************************
Systemd::Timer[syncoid-storage1-objects.timer] =>
parameters =>
service_content =>
@@ -4,4 +4,6 @@
[Unit]
Description=ZFS dataset synchronization of_
+StartLimitIntervalSec=5min
+StartLimitBurst=2
_
[Service]
@@ -10,4 +12,6 @@
Group=root
ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/objects data/sync/storage1/objects
+Restart=on-failure
+RestartSec=1s
_
[Install]
*******************************************
Systemd::Unit_file[syncoid-storage1-kafka.service] =>
parameters =>
content =>
@@ -4,4 +4,6 @@
[Unit]
Description=ZFS dataset synchronization of_
+StartLimitIntervalSec=5min
+StartLimitBurst=2
_
[Service]
@@ -10,4 +12,6 @@
Group=root
ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/kafka data/sync/storage1/kafka
+Restart=on-failure
+RestartSec=1s
_
[Install]
*******************************************
Systemd::Unit_file[syncoid-storage1-objects.service] =>
parameters =>
content =>
@@ -4,4 +4,6 @@
[Unit]
Description=ZFS dataset synchronization of_
+StartLimitIntervalSec=5min
+StartLimitBurst=2
_
[Service]
@@ -10,4 +12,6 @@
Group=root
ExecStart=syncoid --sshkey /root/.ssh/id_ed25519.syncoid_db1 root@storage1.internal.staging.swh.network:data/objects data/sync/storage1/objects
+Restart=on-failure
+RestartSec=1s
_
[Install]
*******************************************
*** End octocatalog-diff on db1.internal.staging.swh.network
Migrated from D7216 (view on Phabricator)