From 0dcb1f7d4a369a1ad07db71b590ff5b2bce51a62 Mon Sep 17 00:00:00 2001 From: Jenkins for Software Heritage <jenkins@thyssen.internal.softwareheritage.org> Date: Tue, 4 Mar 2025 13:46:39 +0000 Subject: [PATCH 1/5] v439: Release swh.deposit v2.4.1 --- swh/Chart.yaml | 2 +- values-swh-application-versions.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/swh/Chart.yaml b/swh/Chart.yaml index 42b071584..ddd11276d 100644 --- a/swh/Chart.yaml +++ b/swh/Chart.yaml @@ -1,5 +1,5 @@ apiVersion: v2 -appVersion: 438 +appVersion: 439 description: A Helm chart to deploy the softwareheritage stack name: swh type: application diff --git a/values-swh-application-versions.yaml b/values-swh-application-versions.yaml index ce10db22d..9b6ea6dfc 100644 --- a/values-swh-application-versions.yaml +++ b/values-swh-application-versions.yaml @@ -7,9 +7,9 @@ swh_alter_image_version: '20250220.1' swh_counters_image: container-registry.softwareheritage.org/swh/infra/swh-apps/counters swh_counters_image_version: '20250211.2' swh_deposit_checkers_image: container-registry.softwareheritage.org/swh/infra/swh-apps/deposit_checkers -swh_deposit_checkers_image_version: '20250227.1' +swh_deposit_checkers_image_version: '20250304.1' swh_deposit_image: container-registry.softwareheritage.org/swh/infra/swh-apps/deposit -swh_deposit_image_version: '20250227.1' +swh_deposit_image_version: '20250304.1' swh_graph_image: container-registry.softwareheritage.org/swh/infra/swh-apps/graph swh_graph_image_version: '20250211.3' swh_graphql_image: container-registry.softwareheritage.org/swh/infra/swh-apps/graphql -- GitLab From de4b7d99ef76020343cf81042c3b24854ee24e98 Mon Sep 17 00:00:00 2001 From: Vincent SELLIER <vincent.sellier@softwareheritage.org> Date: Tue, 4 Mar 2025 16:58:53 +0100 Subject: [PATCH 2/5] next-version: Deploy the save-code-now loader To be able to ingest origins on demand on this environment --- swh/values/staging/next-version.yaml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/swh/values/staging/next-version.yaml b/swh/values/staging/next-version.yaml index 551c5a443..e402ed0f5 100644 --- a/swh/values/staging/next-version.yaml +++ b/swh/values/staging/next-version.yaml @@ -299,6 +299,32 @@ loaders: queueThreshold: 1 maxReplicaCount: 1 stopWhenNoActivity: false + save-code-now: + queues: + - save_code_now:swh.loader.bzr.tasks.LoadBazaar + - save_code_now:swh.loader.cvs.tasks.LoadCvsRepository + - save_code_now:swh.loader.git.tasks.UpdateGitRepository + - save_code_now:swh.loader.git.tasks.LoadDiskGitRepository + - save_code_now:swh.loader.git.tasks.UncompressAndLoadDiskGitRepository + - save_code_now:swh.loader.mercurial.tasks.LoadArchiveMercurial + - save_code_now:swh.loader.mercurial.tasks.LoadMercurial + - save_code_now:swh.loader.svn.tasks.LoadSvnRepository + - save_code_now:swh.loader.svn.tasks.MountAndLoadSvnRepository + - save_code_now:swh.loader.svn.tasks.DumpMountAndLoadSvnRepository + - save_code_now:swh.loader.package.archive.tasks.LoadTarball + ackLate: true + autoScaling: + stopWhenNoActivity: true + queueThreshold: 1 + minReplicaCount: 0 + maxReplicaCount: 2 + requestedMemory: 200Mi + requestedCpu: 50m + limitedMemory: 15Gi + tmpEphemeralStorage: + emptyDir: + medium: Memory + sizeLimit: 10Gi svn: enabled: true queues: -- GitLab From e1f80d6ba42dd1e8de53ff6ff5aa83c36046583e Mon Sep 17 00:00:00 2001 From: Vincent SELLIER <vincent.sellier@softwareheritage.org> Date: Tue, 4 Mar 2025 18:18:22 +0100 Subject: [PATCH 3/5] next-version: Deploy a read-only storage for the webapp It's actually a fake read-only storage as there is no read-only user configured on the k8ssandra deployment --- swh/values/staging/next-version.yaml | 46 ++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/swh/values/staging/next-version.yaml b/swh/values/staging/next-version.yaml index e402ed0f5..d3e3c78df 100644 --- a/swh/values/staging/next-version.yaml +++ b/swh/values/staging/next-version.yaml @@ -553,7 +553,7 @@ storageReplayer: cassandraSeeds: - cassandra-cluster-next-version-service -cassandraStorage: &cassandraStorage +rwCassandraStorage: &rwCassandraStorage cls: cassandra cassandraSeedsRef: cassandraSeeds keyspace: swh @@ -573,6 +573,25 @@ cassandraStorage: &cassandraStorage secretKeyRef: cassandra-cluster-superuser secretKeyName: password +roCassandraStorage: + cls: cassandra + cassandraSeedsRef: cassandraSeeds + keyspace: swh + initKeyspace: false + consistencyLevel: LOCAL_QUORUM + authProvider: + cls: cassandra.auth.PlainTextAuthProvider + username: ${CASSANDRA_RW_USERNAME} + password: ${CASSANDRA_RW_PASSWORD} + secrets: + # TODO: deploy a real cassandra RO user + CASSANDRA_RW_USERNAME: + secretKeyRef: cassandra-cluster-superuser + secretKeyName: username + CASSANDRA_RW_PASSWORD: + secretKeyRef: cassandra-cluster-superuser + secretKeyName: password + noopObjstorageConfiguration: cls: noop @@ -678,20 +697,23 @@ rpcROObjstoragePathslicingConfiguration: url: http://objstorage-ro-pathslicing:5003 enable_requests_retry: true -rwStorageCassandraWithJournalConfiguration: - storageConfigurationRef: cassandraStorage +rwCassandraStorageWithJournalConfiguration: + storageConfigurationRef: rwCassandraStorage journalWriterConfigurationRef: storageJournalWriterConfiguration objstorageConfigurationRef: rpcRWObjstoragePathslicingConfiguration +roCassandraStorageConfiguration: + storageConfigurationRef: roCassandraStorage + objstorageConfigurationRef: rpcROObjstoragePathslicingConfiguration -cassandraStorageForReferenceTableJob: - <<: *cassandraStorage +rwCassandraStorageForReferenceTableJob: + <<: *rwCassandraStorage specificOptions: table_options: object_references_*: "compression = {'class': 'ZstdCompressor', 'compression_level':'19'} AND compaction = {'class': 'UnifiedCompactionStrategy'}" -rwCassandraStorageForReferenceTableJob: - storageConfigurationRef: cassandraStorageForReferenceTableJob +roCassandraStorageForReferenceTableJob: + storageConfigurationRef: rwCassandraStorageForReferenceTableJob objstorageConfigurationRef: noopObjstorageConfiguration storage: @@ -699,7 +721,7 @@ storage: deployments: # main storage to let the loaders write rw-cassandra: - storageConfigurationRef: rwStorageCassandraWithJournalConfiguration + storageConfigurationRef: rwCassandraStorageWithJournalConfiguration cronjobs: create-object-reference-partitions: enabled: false @@ -707,7 +729,13 @@ storage: logLevel: INFO cron: 5 0 * * mon concurrencyPolicy: Forbid - # a storage postgresql instance to replay data + # RO cassandra storage, for webapp + ro-cassandra: + storageConfigurationRef: roCassandraStorageConfiguration + autoScaling: + minReplicaCount: 1 + maxReplicaCount: 1 + # storage postgresql instance to replay data rw-postgresql: enabled: true storageConfigurationRef: rwStoragePostgresqlConfiguration -- GitLab From 51ca44e2b49b80e260fd5be2535348c7e942ca06 Mon Sep 17 00:00:00 2001 From: Vincent SELLIER <vincent.sellier@softwareheritage.org> Date: Tue, 4 Mar 2025 19:17:58 +0100 Subject: [PATCH 4/5] swh-next-version: Configure the webapp to use the ro cassandra storage --- swh/values/staging/next-version.yaml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/swh/values/staging/next-version.yaml b/swh/values/staging/next-version.yaml index d3e3c78df..cc0ebe47d 100644 --- a/swh/values/staging/next-version.yaml +++ b/swh/values/staging/next-version.yaml @@ -121,14 +121,19 @@ remoteROPostgresqlStorageConfiguration: url: http://storage-ro-postgresql:5002 enable_requests_retry: true -remoteCassandraStorageConfiguration: +remoteROCassandraStorageConfiguration: + cls: remote + url: http://storage-ro-cassandra:5002 + enable_requests_retry: true + +remoteRWCassandraStorageConfiguration: cls: remote url: http://storage-rw-cassandra:5002 enable_requests_retry: true remoteRWStorageWithPipelineConfiguration: pipelineStepsRef: storagePipelineSteps - storageConfigurationRef: remoteCassandraStorageConfiguration + storageConfigurationRef: remoteRWCassandraStorageConfiguration rwStoragePostgresqlConfiguration: storageConfigurationRef: postgresqlRWStorageConfiguration @@ -782,7 +787,7 @@ web: indexerStorageConfigurationRef: remoteIndexerStorageConfiguration countersConfigurationRef: remoteCountersConfiguration historyCountersUrlRef: historyCountersUrl - storageConfigurationRef: remoteROPostgresqlStorageConfiguration + storageConfigurationRef: remoteROCassandraStorageConfiguration webhooksConfigurationRef: webhooksConfiguration provenanceConfigurationRef: provenanceConfiguration graphConfigurationRef: webGraphConfiguration @@ -1209,7 +1214,7 @@ deposit: # In prod this storage is read-only storageConfigurationRef: remoteROPostgresqlStorageConfiguration # XXX: but this one is read-write? - storageMetadataConfigurationRef: remoteCassandraStorageConfiguration + storageMetadataConfigurationRef: remoteRWCassandraStorageConfiguration djangoConfigurationRef: djangoDepositConfiguration blobstorageConfigurationRef: azureDepositConfiguration keycloakConfigurationRef: keycloakConfiguration -- GitLab From 7b9ddf3fe5616ba181563d0773336fb9700e7487 Mon Sep 17 00:00:00 2001 From: Vincent SELLIER <vincent.sellier@softwareheritage.org> Date: Tue, 4 Mar 2025 20:36:50 +0100 Subject: [PATCH 5/5] swh-next-version: Use the same objstorage for read and write The volumes were not shared between the ro and the rw instance so the ro instance didn't see any contents --- swh/values/staging/next-version.yaml | 33 +++------------------------- 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/swh/values/staging/next-version.yaml b/swh/values/staging/next-version.yaml index cc0ebe47d..82e697fcd 100644 --- a/swh/values/staging/next-version.yaml +++ b/swh/values/staging/next-version.yaml @@ -665,29 +665,6 @@ objstorage: # check readWriteStorageConfiguration example to configure your storage objstorageConfigurationRef: pathslicingRWObjstorageConfiguration - # Activate dedicated instance for read workload - ro-pathslicing: - enabled: true - nodeSelector: - kubernetes.io/hostname: rancher-node-staging-rke2-metal01 - extraVolumes: - pathslicing-ro: - mountPath: /srv/swh/objects - readOnly: true - volumeDefinition: - ephemeral: - volumeClaimTemplate: - metadata: - labels: - type: persistent-volume - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 10Gi - storageClassName: local-persistent - # mandatory # check readWriteStorageConfiguration example to configure your storage objstorageConfigurationRef: pathslicingROObjstorageConfiguration @@ -697,11 +674,6 @@ rpcRWObjstoragePathslicingConfiguration: url: http://objstorage-rw-pathslicing:5003 enable_requests_retry: true -rpcROObjstoragePathslicingConfiguration: - cls: remote - url: http://objstorage-ro-pathslicing:5003 - enable_requests_retry: true - rwCassandraStorageWithJournalConfiguration: storageConfigurationRef: rwCassandraStorage journalWriterConfigurationRef: storageJournalWriterConfiguration @@ -709,7 +681,8 @@ rwCassandraStorageWithJournalConfiguration: roCassandraStorageConfiguration: storageConfigurationRef: roCassandraStorage - objstorageConfigurationRef: rpcROObjstoragePathslicingConfiguration + # TODO: Use a RO objstorage + objstorageConfigurationRef: rpcRWObjstoragePathslicingConfiguration rwCassandraStorageForReferenceTableJob: <<: *rwCassandraStorage @@ -1122,7 +1095,7 @@ indexers: storageConfigurationRef: remoteROStorageConfiguration schedulerConfigurationRef: remoteSchedulerConfiguration indexerStorageConfigurationRef: remoteIndexerStorageConfiguration - objstorageConfigurationRef: rpcROObjstoragePathslicingConfiguration + objstorageConfigurationRef: rpcRWObjstoragePathslicingConfiguration journalClientConfigurationRef: indexerJournalClientConfiguration deployments: origin-intrinsic: -- GitLab