trustyai-explainability · christinaexyou · Oct 3, 2024
diff --git a/manifests/caikit/caikit-tgis-isvc-grpc.yaml b/manifests/caikit/caikit-tgis-isvc-grpc.yaml
@@ -0,0 +1,20 @@
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  annotations:
+    serving.knative.openshift.io/enablePassthrough: "true"
+    sidecar.istio.io/inject: "true"
+    sidecar.istio.io/rewriteAppHTTPProbers: "true"
+  name: caikit-tgis-isvc-grpc
+spec:
+  predictor:
+    serviceAccountName: sa
+    model:
+      modelFormat:
+        name: caikit
+      runtime: caikit-tgis-runtime-grpc
+      storageUri: s3://modelmesh-example-models/llm/models/flan-t5-small-caikit # single model here
+      # storageUri: proto://path/to/model # single model here
+      # Example, using a pvc:
+      # storageUri: pvc://caikit-pvc/flan-t5-small-caikit/
+      # Target directory must contain a config.yml
diff --git a/manifests/caikit/caikit-tgis-servingruntime-grpc.yaml b/manifests/caikit/caikit-tgis-servingruntime-grpc.yaml
@@ -0,0 +1,59 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata:
+  name: caikit-tgis-runtime-grpc
+spec:
+  multiModel: false
+  supportedModelFormats:
+    # Note: this currently *only* supports caikit format models
+    - autoSelect: true
+      name: caikit
+  containers:
+    - name: kserve-container
+      image: quay.io/opendatahub/text-generation-inference:stable
+      command: ["text-generation-launcher"]
+      args: ["--model-name=/mnt/models/artifacts/"]
+      env:
+        - name: TRANSFORMERS_CACHE
+          value: /tmp/transformers_cache
+      # resources: # configure as required
+      #   requests:
+      #     cpu: 8
+      #     memory: 16Gi
+      ## Note: cannot add readiness/liveness probes to this container because knative will refuse them.
+      # multi-container probing will be available after https://github.com/knative/serving/pull/14853 is merged
+    - name: transformer-container
+      image: quay.io/opendatahub/caikit-tgis-serving:stable
+      env:
+        - name: RUNTIME_LOCAL_MODELS_DIR
+          value: /mnt/models
+        - name: TRANSFORMERS_CACHE
+          value: /tmp/transformers_cache
+        - name: RUNTIME_GRPC_ENABLED
+          value: "true"
+        - name: RUNTIME_HTTP_ENABLED
+          value: "false"
+      ports:
+        - containerPort: 8085
+          name: h2c
+          protocol: TCP
+      readinessProbe:
+        exec:
+          command:
+            - python
+            - -m
+            - caikit_health_probe
+            - readiness
+        initialDelaySeconds: 5 # might require larger values for large models
+      livenessProbe:
+        exec:
+          command:
+            - python
+            - -m
+            - caikit_health_probe
+            - liveness
+        initialDelaySeconds: 5
+      # resources: # configure as required
+      #   requests:
+      #     cpu: 8
+      #     memory: 16Gi
diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml
@@ -0,0 +1,28 @@
+kind: ConfigMap
+apiVersion: v1
+metadata:
+  name: fms-orchestr8-config-nlp
+  labels:
+    app: fmstack-nlp
+    component: fms-orchestr8-nlp
+    deploy-name: fms-orchestr8-nlp
+data:
+  config.yaml: |
+    generation:
+      provider: tgis
+      service:
+          hostname: caikit-tgis-isvc-grpc-predictor-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com
+          port: 443
+          # tls: caikit
+    detectors:
+      regex:
+        service:
+          hostname:  https://regex-detector-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com/api/v1/text/contents
+          port: 443
+        chunker_id: whole_doc_chunker
+        default_threshold: 0.5
+      # tls:
+      # caikit:
+      #   cert_path: /tls/server/tls.crt
+      #   key_path: /tls/server/tls.key
+      #   client_ca_cert_path: /tls/server/ca.crt
diff --git a/manifests/deployment.yaml b/manifests/deployment.yaml
@@ -0,0 +1,98 @@
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: fms-orchestr8-nlp
+  annotations:
+    configmap.reloader.stakater.com/reload: 'fms-orchestr8-config'
+  labels:
+    app: fmstack-nlp
+    component: fms-orchestr8-nlp
+    deploy-name: fms-orchestr8-nlp
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: fmstack-nlp
+      component: fms-orchestr8-nlp
+      deploy-name: fms-orchestr8-nlp
+  template:
+    metadata:
+      labels:
+        app: fmstack-nlp
+        component: fms-orchestr8-nlp
+        deploy-name: fms-orchestr8-nlp
+    spec:
+      volumes:
+        - name: fms-orchestr8-config-nlp
+          configMap:
+            name: fms-orchestr8-config-nlp
+            defaultMode: 420
+        - name: server-tls
+          secret:
+            secretName: caikitstack-caikit-inf-tls
+            defaultMode: 256
+      containers:
+        - resources:
+            limits:
+              cpu: '1'
+              memory: 2Gi
+            requests:
+              cpu: '1'
+              memory: 2Gi
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: 8034
+              scheme: HTTP
+            initialDelaySeconds: 5
+            timeoutSeconds: 1
+            periodSeconds: 20
+            successThreshold: 1
+            failureThreshold: 3
+          terminationMessagePath: /dev/termination-log
+          name: fms-orchestr8-nlp
+          command:
+            - /app/bin/fms-guardrails-orchestr8
+          env:
+            - name: ORCHESTRATOR_CONFIG
+              value: /config/config.yaml
+            - name: HTTP_PORT
+              value: '8033'
+            - name: START_UP_HEALTH_CHECK
+              value: 'false'
+          ## Mount certs to /tls/orch and uncomment to enable (m)TLS
+          # - name: TLS_KEY_PATH
+          #   value: /tls/orch/server.key
+          # - name: TLS_CERT_PATH
+          #   value: /tls/orch/server.crt
+          # - name: TLS_CLIENT_CA_CERT_PATH
+          #   value: /tls/orch/ca.crt
+            - name: RUST_BACKTRACE
+              value: full
+            - name: RUST_LOG
+              value: 'fms_guardrails_orchestr8=debug'
+          securityContext:
+            capabilities:
+              drop:
+                - ALL
+            privileged: false
+            runAsNonRoot: true
+            readOnlyRootFilesystem: true
+            allowPrivilegeEscalation: false
+            seccompProfile:
+              type: RuntimeDefault
+          ports:
+            - name: http
+              containerPort: 8033
+              protocol: TCP
+          imagePullPolicy: Always
+          volumeMounts:
+            - name: fms-orchestr8-config-nlp # This refers to the configmap below
+              readOnly: true
+              mountPath: /config/config.yaml
+              subPath: config.yaml
+            # - name: server-tls # This is for the caikit server for generation [may want to name this better]
+            #   readOnly: true
+            #   mountPath: /tls/server
+          terminationMessagePolicy: File
+          image: quay.io/csantiago/guardrails-orchestrator:latest
diff --git a/manifests/guardrails-orchestrator.yaml b/manifests/guardrails-orchestrator.yaml
@@ -0,0 +1,170 @@
+kind: ConfigMap
+apiVersion: v1
+metadata:
+  name: fms-orchestr8-config-nlp
+  labels:
+    app: fmstack-nlp
+    component: fms-orchestr8-nlp
+    deploy-name: fms-orchestr8-nlp
+data:
+  config.yaml: |
+    generation:
+      provider: tgis
+      service:
+          hostname: caikit-tgis-isvc-grpc-predictor-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com
+          port: 443
+          # tls: caikit
+    detectors:
+      regex:
+        service:
+          hostname:  https://regex-detector-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com/api/v1/text/contents
+          port: 443
+        chunker_id: whole_doc_chunker
+        default_threshold: 0.5
+      # tls:
+      # caikit:
+      #   cert_path: /tls/server/tls.crt
+      #   key_path: /tls/server/tls.key
+      #   client_ca_cert_path: /tls/server/ca.crt
+---
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: fms-orchestr8-nlp
+  annotations:
+    configmap.reloader.stakater.com/reload: 'fms-orchestr8-config'
+  labels:
+    app: fmstack-nlp
+    component: fms-orchestr8-nlp
+    deploy-name: fms-orchestr8-nlp
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: fmstack-nlp
+      component: fms-orchestr8-nlp
+      deploy-name: fms-orchestr8-nlp
+  template:
+    metadata:
+      labels:
+        app: fmstack-nlp
+        component: fms-orchestr8-nlp
+        deploy-name: fms-orchestr8-nlp
+    spec:
+      volumes:
+        - name: fms-orchestr8-config-nlp
+          configMap:
+            name: fms-orchestr8-config-nlp
+            defaultMode: 420
+        - name: server-tls
+          secret:
+            secretName: caikitstack-caikit-inf-tls
+            defaultMode: 256
+      containers:
+        - resources:
+            limits:
+              cpu: '1'
+              memory: 2Gi
+            requests:
+              cpu: '1'
+              memory: 2Gi
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: 8034
+              scheme: HTTP
+            initialDelaySeconds: 5
+            timeoutSeconds: 1
+            periodSeconds: 20
+            successThreshold: 1
+            failureThreshold: 3
+          terminationMessagePath: /dev/termination-log
+          name: fms-orchestr8-nlp
+          command:
+            - /app/bin/fms-guardrails-orchestr8
+          env:
+            - name: ORCHESTRATOR_CONFIG
+              value: /config/config.yaml
+            - name: HTTP_PORT
+              value: '8033'
+            - name: START_UP_HEALTH_CHECK
+              value: 'false'
+          ## Mount certs to /tls/orch and uncomment to enable (m)TLS
+          # - name: TLS_KEY_PATH
+          #   value: /tls/orch/server.key
+          # - name: TLS_CERT_PATH
+          #   value: /tls/orch/server.crt
+          # - name: TLS_CLIENT_CA_CERT_PATH
+          #   value: /tls/orch/ca.crt
+            - name: RUST_BACKTRACE
+              value: full
+            - name: RUST_LOG
+              value: 'fms_guardrails_orchestr8=debug'
+          securityContext:
+            capabilities:
+              drop:
+                - ALL
+            privileged: false
+            runAsNonRoot: true
+            readOnlyRootFilesystem: true
+            allowPrivilegeEscalation: false
+            seccompProfile:
+              type: RuntimeDefault
+          ports:
+            - name: http
+              containerPort: 8033
+              protocol: TCP
+          imagePullPolicy: Always
+          volumeMounts:
+            - name: fms-orchestr8-config-nlp # This refers to the configmap below
+              readOnly: true
+              mountPath: /config/config.yaml
+              subPath: config.yaml
+            # - name: server-tls # This is for the caikit server for generation [may want to name this better]
+            #   readOnly: true
+            #   mountPath: /tls/server
+          terminationMessagePolicy: File
+          image: quay.io/csantiago/guardrails-orchestrator:latest
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: fms-orchestr8-nlp
+  labels:
+    app: fmstack-nlp
+    component: fms-orchestr8-nlp
+spec:
+  ipFamilies:
+    - IPv4
+  ports:
+    - name: http
+      protocol: TCP
+      port: 8033
+      targetPort: 8033
+  internalTrafficPolicy: Cluster
+  type: ClusterIP
+  ipFamilyPolicy: SingleStack
+  sessionAffinity: None
+  selector:
+    app: fmstack-nlp
+    component: fms-orchestr8-nlp
+    deploy-name: fms-orchestr8-nlp
+---
+kind: Route
+apiVersion: route.openshift.io/v1
+metadata:
+  name: gr2-nlp
+  labels:
+    app: fmstack-nlp
+    component: fms-orchestr8-nlp
+spec:
+  to:
+    kind: Service
+    name: fms-orchestr8-nlp
+    weight: 100
+  port:
+    targetPort: http
+  tls:
+    termination: edge
+    insecureEdgeTerminationPolicy: Redirect
+  wildcardPolicy: None
diff --git a/manifests/minio/minio-secret.yaml b/manifests/minio/minio-secret.yaml
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  annotations:
+    serving.kserve.io/s3-endpoint: minio.<minio_ns>.svc:9000 # replace with your s3 endpoint e.g minio-service.kubeflow:9000
+    serving.kserve.io/s3-usehttps: "0" # by default 1, if testing with minio you can set to 0
+    serving.kserve.io/s3-region: "us-east-2"
+    serving.kserve.io/s3-useanoncredential: "false" # omitting this is the same as false, if true will ignore provided credential and use anonymous credentials
+  name: storage-config
+stringData:
+  "AWS_ACCESS_KEY_ID": "admin"
+  "AWS_SECRET_ACCESS_KEY": "password"