production/provenance: Deploy grpc instance
This adds a new provenance grpc instance in production. It will fetch its dataset from s3 and build its indices. [1]
The 2nd commit will switch the archive.s.o's provenance api to use the new provenance grpc instance (currently using the provenance rpc. [2]
Depends on:
-
mam being reinstalled as rancher-node-highmem03
Plan:
-
Merge the first commit and let the grpc instance install its dataset (will take a while) -
Ensure the grpc is responding properly -
Once the grpc runs properly, merged the 2nd commit so the webapp use the new provenance grpc server
[1] Adds provenance in production
[swh] Comparing changes between branches production and mr/production-deploy-provenance (per environment)...
Your branch is up to date with 'origin/production'.
[swh] Generate config in production branch for environment staging, namespace swh...
[swh] Generate config in production branch for environment staging, namespace swh-cassandra...
[swh] Generate config in production branch for environment staging, namespace next-version...
[swh] Generate config in mr/production-deploy-provenance branch for environment staging...
[swh] Generate config in mr/production-deploy-provenance branch for environment staging...
[swh] Generate config in mr/production-deploy-provenance branch for environment staging...
Your branch is up to date with 'origin/production'.
[swh] Generate config in production branch for environment production, namespace swh...
[swh] Generate config in production branch for environment production, namespace swh-cassandra...
[swh] Generate config in production branch for environment production, namespace next-version...
[swh] Generate config in mr/production-deploy-provenance branch for environment production...
[swh] Generate config in mr/production-deploy-provenance branch for environment production...
[swh] Generate config in mr/production-deploy-provenance branch for environment production...
------------- diff for environment staging namespace swh -------------
No differences
------------- diff for environment staging namespace swh-cassandra -------------
No differences
------------- diff for environment staging namespace next-version -------------
No differences
------------- diff for environment production namespace swh -------------
--- /tmp/swh-chart.swh.7XwmjvMS/production-swh.before 2025-03-27 16:23:35.438292296 +0100
+++ /tmp/swh-chart.swh.7XwmjvMS/production-swh.after 2025-03-27 16:23:35.942272671 +0100
@@ -3745,20 +3745,30 @@
name: webapp-provenance-ingress
namespace: swh
spec:
type: ExternalName
externalName: archive-production-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
---
# Source: swh/templates/external-services/cname.yaml
apiVersion: v1
kind: Service
metadata:
+ name: provenance-20241206-grpc-ingress
+ namespace: swh
+spec:
+ type: ExternalName
+ externalName: archive-production-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
+---
+# Source: swh/templates/external-services/cname.yaml
+apiVersion: v1
+kind: Service
+metadata:
name: webapp-provenance-ingress-swh-cassandra
namespace: swh
spec:
type: ExternalName
externalName: archive-production-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
---
# Source: swh/templates/external-services/cname.yaml
apiVersion: v1
kind: Service
metadata:
------------- diff for environment production namespace swh-cassandra -------------
--- /tmp/swh-chart.swh.7XwmjvMS/production-swh-cassandra.before 2025-03-27 16:23:35.794278434 +0100
+++ /tmp/swh-chart.swh.7XwmjvMS/production-swh-cassandra.after 2025-03-27 16:23:36.314258187 +0100
@@ -11880,20 +11880,46 @@
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- rancher-node-highmem01
persistentVolumeReclaimPolicy: Retain
storageClassName: local-storage
volumeMode: Filesystem
---
+# Source: swh/templates/volumes/persistent-volume.yaml
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: provenance-20241206-persistent-local-pv
+ labels:
+ app: provenance-grpc-20241206
+spec:
+ accessModes:
+ - ReadWriteOnce
+ capacity:
+ storage: 30Ti
+ local:
+ path: /srv/softwareheritage/provenance-20241206
+ nodeAffinity:
+ required:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: kubernetes.io/hostname
+ operator: In
+ values:
+ - rancher-node-highmem03
+ persistentVolumeReclaimPolicy: Retain
+ storageClassName: local-storage
+ volumeMode: Filesystem
+---
# Source: swh/templates/volumes/persistent-volume-claims.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: alter-recovery-bundles-pvc
namespace: swh-cassandra
labels:
app: alter
spec:
accessModes:
@@ -11932,20 +11958,38 @@
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: local-storage
volumeMode: Filesystem
volumeName: graph-20241206-persistent-local-pv
---
+# Source: swh/templates/volumes/persistent-volume-claims.yaml
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: provenance-20241206-persistent-pvc
+ namespace: swh-cassandra
+ labels:
+ app: provenance-20241206-grpc
+spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 30Ti
+ storageClassName: local-storage
+ volumeMode: Filesystem
+ volumeName: provenance-20241206-persistent-local-pv
+---
# Source: swh/templates/counters/rpc-service.yaml
apiVersion: v1
kind: Service
metadata:
name: counters-rpc
namespace: swh-cassandra
labels:
app: counters-rpc
spec:
type: ClusterIP
@@ -12184,20 +12228,30 @@
name: webapp-provenance-ingress
namespace: swh-cassandra
spec:
type: ExternalName
externalName: archive-production-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
---
# Source: swh/templates/external-services/cname.yaml
apiVersion: v1
kind: Service
metadata:
+ name: provenance-20241206-grpc-ingress
+ namespace: swh-cassandra
+spec:
+ type: ExternalName
+ externalName: archive-production-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
+---
+# Source: swh/templates/external-services/cname.yaml
+apiVersion: v1
+kind: Service
+metadata:
name: webapp-provenance-ingress-swh-cassandra
namespace: swh-cassandra
spec:
type: ExternalName
externalName: archive-production-rke2-ingress-nginx-controller.ingress-nginx.svc.cluster.local
---
# Source: swh/templates/external-services/cname.yaml
apiVersion: v1
kind: Service
metadata:
@@ -12498,20 +12552,37 @@
app: objstorage-ro-saam-zfs
ports:
- port: 5003
targetPort: 5003
name: rpc
---
# Source: swh/templates/provenance/service.yaml
apiVersion: v1
kind: Service
metadata:
+ name: provenance-20241206-grpc
+ namespace: swh-cassandra
+ labels:
+ app: provenance-20241206-grpc
+spec:
+ type: ClusterIP
+ selector:
+ app: provenance-20241206-grpc
+ ports:
+ - port: 50141
+ targetPort: 50141
+ name: grpc
+---
+# Source: swh/templates/provenance/service.yaml
+apiVersion: v1
+kind: Service
+metadata:
name: provenance-graph-granet
namespace: swh-cassandra
labels:
app: provenance-graph-granet
spec:
type: ClusterIP
selector:
app: provenance-graph-granet
ports:
- port: 5014
@@ -25517,20 +25588,209 @@
- name: pathslicing
hostPath:
path: /srv/softwareheritage/objects
type: Directory
---
# Source: swh/templates/provenance/deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: swh-cassandra
+ name: provenance-20241206-grpc
+ labels:
+ app: provenance-20241206-grpc
+spec:
+ revisionHistoryLimit: 2
+ replicas: 2
+ selector:
+ matchLabels:
+ app: provenance-20241206-grpc
+ strategy:
+ type: RollingUpdate
+ rollingUpdate:
+ maxSurge: 1
+ template:
+ metadata:
+ labels:
+ app: provenance-20241206-grpc
+ annotations:
+ checksum/config: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+ checksum/config-logging: 02745ed090f5098b0de1cd463a67abdc6a7c6b2ff2e5ca30c8f04309733750f5
+ checksum/config-utils: 13a26f6add17e96ce01550153c77dcd48de60241a3f4db3c93d5467234be2a7f
+ checksum/backend-utils: 5ed55de12f3a82cd556464e232ae318f8523db72abd86b7410994ca05c3848ed
+ spec:
+ affinity:
+ nodeAffinity:
+ requiredDuringSchedulingIgnoredDuringExecution:
+ nodeSelectorTerms:
+ - matchExpressions:
+ - key: swh/rpc
+ operator: In
+ values:
+ - "true"
+ priorityClassName: swh-cassandra-frontend-rpc
+ initContainers:
+ - name: fetch-provenance-dataset
+ image: container-registry.softwareheritage.org/swh/infra/swh-apps/provenance:20250326.4
+ command:
+ - /entrypoints/provenance-fetch-datasets.sh
+ env:
+ - name: WITNESS_FETCH_FILE
+ value: /srv/dataset/provenance/.provenance-is-initialized
+ - name: WITNESS_DOWNLOADING_FILE
+ value: /srv/dataset/provenance/.provenance-is-downloading
+ - name: SWH_CONFIG_FILENAME
+ value: /etc/swh/config.yml
+ - name: PROVENANCE_PATH
+ value: /srv/dataset/provenance
+ - name: PROVENANCE_DATASET_FULL
+ value: "true"
+ - name: GRAPH_PATH
+ value: /srv/dataset/graph
+ - name: DATASET_VERSION
+ value: 2024-12-06
+ volumeMounts:
+ - name: configuration
+ mountPath: /etc/swh
+ - name: backend-utils
+ mountPath: /entrypoints
+ - name: dataset-persistent
+ mountPath: /srv/dataset
+ readOnly: false
+
+ - name: index-provenance-dataset
+ image: container-registry.softwareheritage.org/swh/infra/swh-apps/provenance:20250326.4
+ imagePullPolicy: IfNotPresent
+ command:
+ - /entrypoints/provenance-index-dataset.sh
+ env:
+ - name: WITNESS_DATASETS_FILE
+ value: /srv/dataset/provenance/.provenance-is-initialized
+ - name: WITNESS_INDEX_FILE
+ value: /srv/dataset/provenance/.provenance-is-indexed
+ - name: WITNESS_INDEXING_FILE
+ value: /srv/dataset/provenance/.provenance-is-indexing
+ - name: PROVENANCE_PATH
+ value: /srv/dataset/provenance
+ - name: PERIOD
+ value: "3"
+ volumeMounts:
+ - name: backend-utils
+ mountPath: /entrypoints
+ readOnly: true
+ - name: dataset-persistent
+ mountPath: /srv/dataset
+ readOnly: false
+
+ - name: wait-for-dataset
+ image: container-registry.softwareheritage.org/swh/infra/swh-apps/utils:20250211.1
+ imagePullPolicy: IfNotPresent
+ command:
+ - /entrypoints/wait-for-dataset.sh
+ env:
+ - name: WITNESS_FILE
+ value: /srv/dataset/provenance/.provenance-is-initialized
+ - name: SERVICE_NAME
+ value: "provenance"
+ - name: PERIOD
+ value: "3"
+ volumeMounts:
+ - name: backend-utils
+ mountPath: /entrypoints
+ readOnly: true
+ - name: dataset-persistent
+ mountPath: /srv/dataset
+ readOnly: true
+
+ containers:
+ - name: provenance-20241206-grpc
+ resources:
+ requests:
+ memory: 512Mi
+ cpu: 500m
+ image: container-registry.softwareheritage.org/swh/infra/swh-apps/provenance:20250326.4
+ imagePullPolicy: IfNotPresent
+ ports:
+ - containerPort: 50141
+ name: grpc
+ readinessProbe:
+ tcpSocket:
+ port: grpc
+ initialDelaySeconds: 15
+ failureThreshold: 30
+ periodSeconds: 5
+ livenessProbe:
+ tcpSocket:
+ port: grpc
+ initialDelaySeconds: 10
+ periodSeconds: 5
+ command:
+ - /bin/bash
+ args:
+ - -c
+ - /opt/swh/entrypoint.sh
+ env:
+ - name: PROVENANCE_TYPE
+ value: grpc
+ - name: PORT
+ value: "50141"
+ - name: PROVENANCE_PATH
+ value: /srv/dataset/provenance
+ - name: GRAPH_PATH
+ value: /srv/dataset/graph/graph
+ - name: STATSD_HOST
+ value: prometheus-statsd-exporter
+ - name: STATSD_PORT
+ value: "9125"
+ - name: STATSD_TAGS
+ value: deployment:provenance-20241206-grpc
+ - name: SWH_LOG_LEVEL
+ value: INFO
+ - name: SWH_SENTRY_ENVIRONMENT
+ value: production
+ - name: SWH_MAIN_PACKAGE
+ value: swh.provenance
+ - name: SWH_SENTRY_DSN
+ valueFrom:
+ secretKeyRef:
+ name: common-secrets
+ key: provenance-sentry-dsn
+ # 'name' secret should exist & include key
+ # if the setting doesn't exist, sentry pushes will be disabled
+ optional: true
+ - name: SWH_SENTRY_DISABLE_LOGGING_EVENTS
+ value: "true"
+ volumeMounts:
+ - name: dataset-persistent
+ mountPath: /srv/dataset
+ readOnly: false
+
+ volumes:
+ - name: configuration
+ emptyDir: {}
+ - name: config-utils
+ configMap:
+ name: config-utils
+ defaultMode: 0555
+ - name: backend-utils
+ configMap:
+ name: backend-utils
+ defaultMode: 0555
+ - name: dataset-persistent
+ persistentVolumeClaim:
+ claimName: provenance-20241206-persistent-pvc
+---
+# Source: swh/templates/provenance/deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ namespace: swh-cassandra
name: provenance-graph-granet
labels:
app: provenance-graph-granet
spec:
revisionHistoryLimit: 2
replicas: 2
selector:
matchLabels:
app: provenance-graph-granet
strategy:
@@ -31653,20 +31913,81 @@
service:
name: objstorage-ro-saam-zfs
port:
number: 5003
---
# Source: swh/templates/provenance/ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
namespace: swh-cassandra
+ name: provenance-20241206-grpc-ingress-default
+ labels:
+ app: provenance-20241206-grpc
+ endpoint-definition: default
+ annotations:
+ nginx.ingress.kubernetes.io/backend-protocol: GRPC
+ nginx.ingress.kubernetes.io/client-body-buffer-size: 128K
+ nginx.ingress.kubernetes.io/proxy-body-size: 4G
+ nginx.ingress.kubernetes.io/proxy-buffering: "on"
+ nginx.ingress.kubernetes.io/service-upstream: "true"
+ nginx.ingress.kubernetes.io/ssl-redirect: "true"
+spec:
+ ingressClassName: nginx
+ rules:
+ - host: provenance-20241206-grpc-ingress
+ http:
+ paths:
+ - path: /
+ pathType: Prefix
+ backend:
+ service:
+ name: provenance-20241206-grpc
+ port:
+ number: 50141
+---
+# Source: swh/templates/provenance/ingress.yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+ namespace: swh-cassandra
+ name: provenance-20241206-grpc-ingress-extra-1-default
+ labels:
+ app: provenance-20241206-grpc
+ endpoint-definition: default
+ annotations:
+ nginx.ingress.kubernetes.io/backend-protocol: GRPC
+ nginx.ingress.kubernetes.io/client-body-buffer-size: 128K
+ nginx.ingress.kubernetes.io/proxy-body-size: 4G
+ nginx.ingress.kubernetes.io/proxy-buffering: "on"
+ nginx.ingress.kubernetes.io/service-upstream: "true"
+ nginx.ingress.kubernetes.io/ssl-redirect: "true"
+ nginx.ingress.kubernetes.io/whitelist-source-range: 10.42.0.0/16,10.43.0.0/16,127.0.0.0/8,192.168.100.0/24,192.168.100.29/32,192.168.101.0/24,192.168.200.0/22,192.168.50.0/24
+spec:
+ ingressClassName: nginx
+ rules:
+ - host: provenance.internal.softwareheritage.org
+ http:
+ paths:
+ - path: /
+ pathType: Prefix
+ backend:
+ service:
+ name: provenance-20241206-grpc
+ port:
+ number: 50141
+---
+# Source: swh/templates/provenance/ingress.yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+ namespace: swh-cassandra
name: provenance-graph-granet-ingress-default
labels:
app: provenance-graph-granet
endpoint-definition: default
annotations:
nginx.ingress.kubernetes.io/client-body-buffer-size: 128K
nginx.ingress.kubernetes.io/proxy-body-size: 4G
nginx.ingress.kubernetes.io/proxy-buffering: "on"
nginx.ingress.kubernetes.io/service-upstream: "true"
nginx.ingress.kubernetes.io/whitelist-source-range: 10.42.0.0/16,10.43.0.0/16
@@ -31693,21 +32014,21 @@
app: provenance-graph-granet
endpoint-definition: default
annotations:
nginx.ingress.kubernetes.io/client-body-buffer-size: 128K
nginx.ingress.kubernetes.io/proxy-body-size: 4G
nginx.ingress.kubernetes.io/proxy-buffering: "on"
nginx.ingress.kubernetes.io/service-upstream: "true"
nginx.ingress.kubernetes.io/whitelist-source-range: 10.42.0.0/16,10.43.0.0/16,127.0.0.0/8,192.168.100.0/24,192.168.100.29/32,192.168.101.0/24,192.168.200.0/22,192.168.50.0/24
spec:
rules:
- - host: provenance.internal.softwareheritage.org
+ - host: provenance-rpc.internal.softwareheritage.org
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: provenance-graph-granet
port:
number: 5014
---
[2] Switch provenance web api to grpc
[swh] Comparing changes between branches production and mr/production-deploy-provenance (per environment)...
Your branch is ahead of 'origin/production' by 1 commit.
(use "git push" to publish your local commits)
[swh] Generate config in production branch for environment staging, namespace swh...
[swh] Generate config in production branch for environment staging, namespace swh-cassandra...
[swh] Generate config in production branch for environment staging, namespace next-version...
[swh] Generate config in mr/production-deploy-provenance branch for environment staging...
[swh] Generate config in mr/production-deploy-provenance branch for environment staging...
[swh] Generate config in mr/production-deploy-provenance branch for environment staging...
Your branch is ahead of 'origin/production' by 1 commit.
(use "git push" to publish your local commits)
[swh] Generate config in production branch for environment production, namespace swh...
[swh] Generate config in production branch for environment production, namespace swh-cassandra...
[swh] Generate config in production branch for environment production, namespace next-version...
[swh] Generate config in mr/production-deploy-provenance branch for environment production...
[swh] Generate config in mr/production-deploy-provenance branch for environment production...
[swh] Generate config in mr/production-deploy-provenance branch for environment production...
------------- diff for environment staging namespace swh -------------
No differences
------------- diff for environment staging namespace swh-cassandra -------------
No differences
------------- diff for environment staging namespace next-version -------------
No differences
------------- diff for environment production namespace swh -------------
--- /tmp/swh-chart.swh.j5zjtXlO/production-swh.before 2025-03-27 16:38:20.203817297 +0100
+++ /tmp/swh-chart.swh.j5zjtXlO/production-swh.after 2025-03-27 16:38:20.703797812 +0100
@@ -3312,23 +3312,22 @@
- ${POD_IP}
storage:
cls: remote
enable_requests_retry: true
url: http://storage-azure-read-only-rpc-ingress
search:
cls: remote
enable_requests_retry: true
url: http://search-rpc-ingress-swh-cassandra
provenance:
- cls: remote
- enable_requests_retry: true
- url: http://webapp-provenance-ingress-swh-cassandra
+ cls: grpc
+ url: provenance-20241206-grpc-ingress:80
scheduler:
cls: remote
enable_requests_retry: true
url: http://scheduler-rpc-ingress-swh-cassandra
vault:
cls: remote
enable_requests_retry: true
url: http://vault-rpc-ingress-swh-cassandra
graph:
max_edges:
@@ -6637,21 +6636,21 @@
app: web-postgresql
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
template:
metadata:
labels:
app: web-postgresql
annotations:
- checksum/config: 3faef2c924beb8ca9bff3885cbeda7aae3b6871f9a0bf545702032f0a2acee0d
+ checksum/config: 6cea34ce9516aea33544f55070bb61f3f3f616e0e791db3e814ecc63c1686f14
checksum/config-logging: 81fb24577eb1777be8690f58c1e92d701777fe4ff045bb8445feb924947b9f84
checksum/config-utils: d75ca13b805bce6a8ab59c8e24c938f2283108f6a79134f6e71db86308651dc6
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: swh/web
operator: In
------------- diff for environment production namespace swh-cassandra -------------
--- /tmp/swh-chart.swh.j5zjtXlO/production-swh-cassandra.before 2025-03-27 16:38:20.563803268 +0100
+++ /tmp/swh-chart.swh.j5zjtXlO/production-swh-cassandra.after 2025-03-27 16:38:21.063783783 +0100
@@ -11357,23 +11357,22 @@
enable_requests_retry: true
url: http://storage-azure-read-only-rpc-ingress-swh-cassandra
status:
json_path: 1.0/status/578e5eddcdc0cc7951000520
server_url: https://status.softwareheritage.org/
search:
cls: remote
enable_requests_retry: true
url: http://search-rpc-ingress-swh-cassandra
provenance:
- cls: remote
- enable_requests_retry: true
- url: http://webapp-provenance-ingress-swh-cassandra
+ cls: grpc
+ url: provenance-20241206-grpc-ingress:80
scheduler:
cls: remote
enable_requests_retry: true
url: http://scheduler-rpc-ingress-swh-cassandra
vault:
cls: remote
enable_requests_retry: true
url: http://vault-rpc-ingress-swh-cassandra
graph:
max_edges:
@@ -29609,21 +29608,21 @@
app: web-archive
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
template:
metadata:
labels:
app: web-archive
annotations:
- checksum/config: efc0af9126788f6ed252d91d73741939a0387406ec053e6976a43fb8a7da7664
+ checksum/config: 533d4e2282283c6682a5ba8399b52b632d2a763bef462a94bd8e5623befaddec
checksum/config-logging: af7bf52757798a2fcd4c237ed3de9df87c15b7f38419128a8d67d02b8a485097
checksum/config-utils: 13a26f6add17e96ce01550153c77dcd48de60241a3f4db3c93d5467234be2a7f
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: swh/web
operator: In
Edited by Antoine R. Dumont
Merge request reports
Activity
Please register or sign in to reply