Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions manifests/caikit/caikit-tgis-isvc-grpc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
annotations:
serving.knative.openshift.io/enablePassthrough: "true"
sidecar.istio.io/inject: "true"
sidecar.istio.io/rewriteAppHTTPProbers: "true"
name: caikit-tgis-isvc-grpc
spec:
predictor:
serviceAccountName: sa
model:
modelFormat:
name: caikit
runtime: caikit-tgis-runtime-grpc
storageUri: s3://modelmesh-example-models/llm/models/flan-t5-small-caikit # single model here
# storageUri: proto://path/to/model # single model here
# Example, using a pvc:
# storageUri: pvc://caikit-pvc/flan-t5-small-caikit/
# Target directory must contain a config.yml
59 changes: 59 additions & 0 deletions manifests/caikit/caikit-tgis-servingruntime-grpc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
name: caikit-tgis-runtime-grpc
spec:
multiModel: false
supportedModelFormats:
# Note: this currently *only* supports caikit format models
- autoSelect: true
name: caikit
containers:
- name: kserve-container
image: quay.io/opendatahub/text-generation-inference:stable
command: ["text-generation-launcher"]
args: ["--model-name=/mnt/models/artifacts/"]
env:
- name: TRANSFORMERS_CACHE
value: /tmp/transformers_cache
# resources: # configure as required
# requests:
# cpu: 8
# memory: 16Gi
## Note: cannot add readiness/liveness probes to this container because knative will refuse them.
# multi-container probing will be available after https://github.com/knative/serving/pull/14853 is merged
- name: transformer-container
image: quay.io/opendatahub/caikit-tgis-serving:stable
env:
- name: RUNTIME_LOCAL_MODELS_DIR
value: /mnt/models
- name: TRANSFORMERS_CACHE
value: /tmp/transformers_cache
- name: RUNTIME_GRPC_ENABLED
value: "true"
- name: RUNTIME_HTTP_ENABLED
value: "false"
ports:
- containerPort: 8085
name: h2c
protocol: TCP
readinessProbe:
exec:
command:
- python
- -m
- caikit_health_probe
- readiness
initialDelaySeconds: 5 # might require larger values for large models
livenessProbe:
exec:
command:
- python
- -m
- caikit_health_probe
- liveness
initialDelaySeconds: 5
# resources: # configure as required
# requests:
# cpu: 8
# memory: 16Gi
28 changes: 28 additions & 0 deletions manifests/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
kind: ConfigMap
apiVersion: v1
metadata:
name: fms-orchestr8-config-nlp
labels:
app: fmstack-nlp
component: fms-orchestr8-nlp
deploy-name: fms-orchestr8-nlp
data:
config.yaml: |
generation:
provider: tgis
service:
hostname: caikit-tgis-isvc-grpc-predictor-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com
port: 443
# tls: caikit
detectors:
regex:
service:
hostname: https://regex-detector-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com/api/v1/text/contents
port: 443
chunker_id: whole_doc_chunker
default_threshold: 0.5
# tls:
# caikit:
# cert_path: /tls/server/tls.crt
# key_path: /tls/server/tls.key
# client_ca_cert_path: /tls/server/ca.crt
98 changes: 98 additions & 0 deletions manifests/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
kind: Deployment
apiVersion: apps/v1
metadata:
name: fms-orchestr8-nlp
annotations:
configmap.reloader.stakater.com/reload: 'fms-orchestr8-config'
labels:
app: fmstack-nlp
component: fms-orchestr8-nlp
deploy-name: fms-orchestr8-nlp
spec:
replicas: 1
selector:
matchLabels:
app: fmstack-nlp
component: fms-orchestr8-nlp
deploy-name: fms-orchestr8-nlp
template:
metadata:
labels:
app: fmstack-nlp
component: fms-orchestr8-nlp
deploy-name: fms-orchestr8-nlp
spec:
volumes:
- name: fms-orchestr8-config-nlp
configMap:
name: fms-orchestr8-config-nlp
defaultMode: 420
- name: server-tls
secret:
secretName: caikitstack-caikit-inf-tls
defaultMode: 256
containers:
- resources:
limits:
cpu: '1'
memory: 2Gi
requests:
cpu: '1'
memory: 2Gi
readinessProbe:
httpGet:
path: /health
port: 8034
scheme: HTTP
initialDelaySeconds: 5
timeoutSeconds: 1
periodSeconds: 20
successThreshold: 1
failureThreshold: 3
terminationMessagePath: /dev/termination-log
name: fms-orchestr8-nlp
command:
- /app/bin/fms-guardrails-orchestr8
env:
- name: ORCHESTRATOR_CONFIG
value: /config/config.yaml
- name: HTTP_PORT
value: '8033'
- name: START_UP_HEALTH_CHECK
value: 'false'
## Mount certs to /tls/orch and uncomment to enable (m)TLS
# - name: TLS_KEY_PATH
# value: /tls/orch/server.key
# - name: TLS_CERT_PATH
# value: /tls/orch/server.crt
# - name: TLS_CLIENT_CA_CERT_PATH
# value: /tls/orch/ca.crt
- name: RUST_BACKTRACE
value: full
- name: RUST_LOG
value: 'fms_guardrails_orchestr8=debug'
securityContext:
capabilities:
drop:
- ALL
privileged: false
runAsNonRoot: true
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
seccompProfile:
type: RuntimeDefault
ports:
- name: http
containerPort: 8033
protocol: TCP
imagePullPolicy: Always
volumeMounts:
- name: fms-orchestr8-config-nlp # This refers to the configmap below
readOnly: true
mountPath: /config/config.yaml
subPath: config.yaml
# - name: server-tls # This is for the caikit server for generation [may want to name this better]
# readOnly: true
# mountPath: /tls/server
terminationMessagePolicy: File
image: quay.io/csantiago/guardrails-orchestrator:latest
170 changes: 170 additions & 0 deletions manifests/guardrails-orchestrator.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
kind: ConfigMap
apiVersion: v1
metadata:
name: fms-orchestr8-config-nlp
labels:
app: fmstack-nlp
component: fms-orchestr8-nlp
deploy-name: fms-orchestr8-nlp
data:
config.yaml: |
generation:
provider: tgis
service:
hostname: caikit-tgis-isvc-grpc-predictor-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com
port: 443
# tls: caikit
detectors:
regex:
service:
hostname: https://regex-detector-guardrails-orchestrator.apps.rosa.chrxu.8vf2.p3.openshiftapps.com/api/v1/text/contents
port: 443
chunker_id: whole_doc_chunker
default_threshold: 0.5
# tls:
# caikit:
# cert_path: /tls/server/tls.crt
# key_path: /tls/server/tls.key
# client_ca_cert_path: /tls/server/ca.crt
---
kind: Deployment
apiVersion: apps/v1
metadata:
name: fms-orchestr8-nlp
annotations:
configmap.reloader.stakater.com/reload: 'fms-orchestr8-config'
labels:
app: fmstack-nlp
component: fms-orchestr8-nlp
deploy-name: fms-orchestr8-nlp
spec:
replicas: 1
selector:
matchLabels:
app: fmstack-nlp
component: fms-orchestr8-nlp
deploy-name: fms-orchestr8-nlp
template:
metadata:
labels:
app: fmstack-nlp
component: fms-orchestr8-nlp
deploy-name: fms-orchestr8-nlp
spec:
volumes:
- name: fms-orchestr8-config-nlp
configMap:
name: fms-orchestr8-config-nlp
defaultMode: 420
- name: server-tls
secret:
secretName: caikitstack-caikit-inf-tls
defaultMode: 256
containers:
- resources:
limits:
cpu: '1'
memory: 2Gi
requests:
cpu: '1'
memory: 2Gi
readinessProbe:
httpGet:
path: /health
port: 8034
scheme: HTTP
initialDelaySeconds: 5
timeoutSeconds: 1
periodSeconds: 20
successThreshold: 1
failureThreshold: 3
terminationMessagePath: /dev/termination-log
name: fms-orchestr8-nlp
command:
- /app/bin/fms-guardrails-orchestr8
env:
- name: ORCHESTRATOR_CONFIG
value: /config/config.yaml
- name: HTTP_PORT
value: '8033'
- name: START_UP_HEALTH_CHECK
value: 'false'
## Mount certs to /tls/orch and uncomment to enable (m)TLS
# - name: TLS_KEY_PATH
# value: /tls/orch/server.key
# - name: TLS_CERT_PATH
# value: /tls/orch/server.crt
# - name: TLS_CLIENT_CA_CERT_PATH
# value: /tls/orch/ca.crt
- name: RUST_BACKTRACE
value: full
- name: RUST_LOG
value: 'fms_guardrails_orchestr8=debug'
securityContext:
capabilities:
drop:
- ALL
privileged: false
runAsNonRoot: true
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
seccompProfile:
type: RuntimeDefault
ports:
- name: http
containerPort: 8033
protocol: TCP
imagePullPolicy: Always
volumeMounts:
- name: fms-orchestr8-config-nlp # This refers to the configmap below
readOnly: true
mountPath: /config/config.yaml
subPath: config.yaml
# - name: server-tls # This is for the caikit server for generation [may want to name this better]
# readOnly: true
# mountPath: /tls/server
terminationMessagePolicy: File
image: quay.io/csantiago/guardrails-orchestrator:latest
---
kind: Service
apiVersion: v1
metadata:
name: fms-orchestr8-nlp
labels:
app: fmstack-nlp
component: fms-orchestr8-nlp
spec:
ipFamilies:
- IPv4
ports:
- name: http
protocol: TCP
port: 8033
targetPort: 8033
internalTrafficPolicy: Cluster
type: ClusterIP
ipFamilyPolicy: SingleStack
sessionAffinity: None
selector:
app: fmstack-nlp
component: fms-orchestr8-nlp
deploy-name: fms-orchestr8-nlp
---
kind: Route
apiVersion: route.openshift.io/v1
metadata:
name: gr2-nlp
labels:
app: fmstack-nlp
component: fms-orchestr8-nlp
spec:
to:
kind: Service
name: fms-orchestr8-nlp
weight: 100
port:
targetPort: http
tls:
termination: edge
insecureEdgeTerminationPolicy: Redirect
wildcardPolicy: None
12 changes: 12 additions & 0 deletions manifests/minio/minio-secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: Secret
metadata:
annotations:
serving.kserve.io/s3-endpoint: minio.<minio_ns>.svc:9000 # replace with your s3 endpoint e.g minio-service.kubeflow:9000
serving.kserve.io/s3-usehttps: "0" # by default 1, if testing with minio you can set to 0
serving.kserve.io/s3-region: "us-east-2"
serving.kserve.io/s3-useanoncredential: "false" # omitting this is the same as false, if true will ignore provided credential and use anonymous credentials
name: storage-config
stringData:
"AWS_ACCESS_KEY_ID": "admin"
"AWS_SECRET_ACCESS_KEY": "password"
Loading