Skip to content

Commit 3a65afc

Browse files
authored
Merge pull request #5 from trustyai-explainability/bump-0.10.0
chore: Bump orchestrator version to 0.10.0
2 parents 383b5d4 + 2347bba commit 3a65afc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+7900
-6823
lines changed

Cargo.lock

Lines changed: 248 additions & 324 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "fms-guardrails-orchestr8"
3-
version = "0.1.0"
3+
version = "0.10.0"
44
edition = "2024"
55
authors = ["Evaline Ju", "Gaurav Kumbhat", "Dan Clark"]
66
description = "Foundation models orchestration server"
@@ -14,24 +14,24 @@ name = "fms-guardrails-orchestr8"
1414
path = "src/main.rs"
1515

1616
[dependencies]
17-
anyhow = "1.0.95"
18-
async-trait = "0.1.85"
19-
axum = { version = "0.8.1", features = ["json"] }
20-
axum-extra = { version = "0.10.0", features = ["json-lines"] }
21-
bytes = "1.10.0"
22-
clap = { version = "4.5.26", features = ["derive", "env"] }
17+
anyhow = "1.0.98"
18+
async-trait = "0.1.88"
19+
axum = { version = "0.8.4", features = ["json"] }
20+
axum-extra = { version = "0.10.1", features = ["json-lines"] }
21+
bytes = "1.10.1"
22+
clap = { version = "4.5.39", features = ["derive", "env"] }
2323
eventsource-stream = "0.2.3"
2424
futures = "0.3.31"
2525
futures-util = { version = "0.3", default-features = false, features = [] }
2626
ginepro = "0.8.2"
27-
http = "1.2.0"
27+
http = "1.3.1"
2828
http-body = "1.0"
29-
http-body-util = "0.1.2"
29+
http-body-util = "0.1.3"
3030
http-serde = "2.1.1"
31-
hyper = { version = "1.5.2", features = ["http1", "http2", "server"] }
32-
hyper-rustls = { version = "0.27.5", features = ["ring"] }
31+
hyper = { version = "1.6.0", features = ["http1", "http2", "server"] }
32+
hyper-rustls = { version = "0.27.6", features = ["ring"] }
3333
hyper-timeout = "0.5.2"
34-
hyper-util = { version = "0.1.10", features = [
34+
hyper-util = { version = "0.1.13", features = [
3535
"server-auto",
3636
"server-graceful",
3737
"tokio",
@@ -44,54 +44,53 @@ opentelemetry-otlp = { version = "0.27.0", features = [
4444
] }
4545
opentelemetry_sdk = { version = "0.27.1", features = ["rt-tokio", "metrics"] }
4646
pin-project-lite = "0.2.16"
47-
prost = "0.13.4"
48-
reqwest = { version = "0.12.12", features = [
47+
prost = "0.13.5"
48+
reqwest = { version = "0.12.18", features = [
4949
"blocking",
5050
"rustls-tls",
5151
"json",
5252
"stream",
5353
] }
54-
rustls = { version = "0.23.21", default-features = false, features = [
54+
rustls = { version = "0.23.27", default-features = false, features = [
5555
"ring",
5656
"std",
5757
] }
5858
rustls-pemfile = "2.2.0"
5959
rustls-webpki = "0.102.8"
60-
serde = { version = "1.0.217", features = ["derive"] }
61-
serde_json = { version = "1.0.135", features = ["preserve_order"] }
60+
serde = { version = "1.0.219", features = ["derive"] }
61+
serde_json = { version = "1.0.140", features = ["preserve_order"] }
6262
serde_yml = "0.0.12"
63-
thiserror = "2.0.11"
64-
tokio = { version = "1.43.0", features = [
63+
thiserror = "2.0.12"
64+
tokio = { version = "1.45.1", features = [
6565
"rt",
6666
"rt-multi-thread",
6767
"parking_lot",
6868
"signal",
6969
"sync",
7070
"fs",
7171
] }
72-
tokio-rustls = { version = "0.26.1", features = ["ring"] }
72+
tokio-rustls = { version = "0.26.2", features = ["ring"] }
7373
tokio-stream = { version = "0.1.17", features = ["sync"] }
7474
tonic = { version = "0.12.3", features = [
7575
"tls",
7676
"tls-roots",
7777
"tls-webpki-roots",
7878
] }
7979
tower = { version = "0.5.2", features = ["timeout"] }
80-
tower-http = { version = "0.6.2", features = ["trace"] }
80+
tower-http = { version = "0.6.4", features = ["trace"] }
8181
tracing = "0.1.41"
8282
tracing-opentelemetry = "0.28.0"
8383
tracing-subscriber = { version = "0.3.19", features = ["json", "env-filter"] }
8484
url = "2.5.4"
85-
uuid = { version = "1.12.1", features = ["v4"] }
85+
uuid = { version = "1.17.0", features = ["v4"] }
8686

8787
[build-dependencies]
8888
tonic-build = "0.12.3"
8989

9090
[dev-dependencies]
91-
axum-test = "17.1.0"
92-
faux = "0.1.12"
93-
mocktail = { version = "0.2.4-alpha" }
94-
rand = "0.9.0"
91+
axum-test = "17.3.0"
92+
mocktail = { git = "https://github.com/IBM/mocktail" }
93+
rand = "0.9.1"
9594
test-log = "0.2.17"
9695

9796
[profile.release]

Dockerfile

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,24 @@ ARG CONFIG_FILE=config/config.yaml
55

66
## Rust builder ################################################################
77
# Specific debian version so that compatible glibc version is used
8-
FROM rust:1.85.1-bullseye AS rust-builder
8+
FROM rust:1.87.0 AS rust-builder
99
ARG PROTOC_VERSION
1010

1111
ENV CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
1212

1313
# Install protoc, no longer included in prost crate
1414
RUN cd /tmp && \
15-
curl -L -O https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-x86_64.zip && \
16-
unzip protoc-*.zip -d /usr/local && rm protoc-*.zip
15+
if [ "$(uname -m)" = "s390x" ]; then \
16+
apt update && \
17+
apt install -y cmake clang libclang-dev curl unzip && \
18+
curl -L -O https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-s390_64.zip; \
19+
else \
20+
curl -L -O https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-x86_64.zip; \
21+
fi && \
22+
unzip protoc-*.zip -d /usr/local && \
23+
rm protoc-*.zip
24+
25+
ENV LIBCLANG_PATH=/usr/lib/llvm-14/lib/
1726

1827
WORKDIR /app
1928

rust-toolchain.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[toolchain]
2-
channel = "1.85.1"
3-
components = ["rustfmt", "clippy"]
2+
channel = "1.87.0"
3+
components = ["rustfmt", "clippy"]

src/args.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,9 @@ impl OtlpProtocol {
140140

141141
#[derive(Debug, Clone, Copy, Default, PartialEq)]
142142
pub enum LogFormat {
143+
Compact,
143144
#[default]
144145
Full,
145-
Compact,
146146
Pretty,
147147
JSON,
148148
}

src/clients.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use hyper_timeout::TimeoutConnector;
3232
use hyper_util::rt::TokioExecutor;
3333
use tonic::{Request, metadata::MetadataMap};
3434
use tower::{ServiceBuilder, timeout::TimeoutLayer};
35-
use tracing::{Span, debug, instrument};
35+
use tracing::Span;
3636
use tracing_opentelemetry::OpenTelemetrySpanExt;
3737
use url::Url;
3838

@@ -205,7 +205,6 @@ impl ClientMap {
205205
}
206206
}
207207

208-
#[instrument(skip_all, fields(hostname = service_config.hostname))]
209208
pub async fn create_http_client(
210209
default_port: u16,
211210
service_config: &ServiceConfig,
@@ -220,7 +219,6 @@ pub async fn create_http_client(
220219
base_url
221220
.set_port(Some(port))
222221
.unwrap_or_else(|_| panic!("error setting port: {}", port));
223-
debug!(%base_url, "creating HTTP client");
224222

225223
let connect_timeout = Duration::from_secs(DEFAULT_CONNECT_TIMEOUT_SEC);
226224
let request_timeout = Duration::from_secs(
@@ -257,7 +255,6 @@ pub async fn create_http_client(
257255
Ok(HttpClient::new(base_url, client))
258256
}
259257

260-
#[instrument(skip_all, fields(hostname = service_config.hostname))]
261258
pub async fn create_grpc_client<C: Debug + Clone>(
262259
default_port: u16,
263260
service_config: &ServiceConfig,
@@ -270,7 +267,6 @@ pub async fn create_grpc_client<C: Debug + Clone>(
270267
};
271268
let mut base_url = Url::parse(&format!("{}://{}", protocol, &service_config.hostname)).unwrap();
272269
base_url.set_port(Some(port)).unwrap();
273-
debug!(%base_url, "creating gRPC client");
274270
let connect_timeout = Duration::from_secs(DEFAULT_CONNECT_TIMEOUT_SEC);
275271
let request_timeout = Duration::from_secs(
276272
service_config

src/clients/chunker.rs

Lines changed: 3 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ use std::pin::Pin;
1919

2020
use async_trait::async_trait;
2121
use axum::http::HeaderMap;
22-
use futures::{Future, Stream, StreamExt, TryStreamExt};
22+
use futures::{Future, StreamExt, TryStreamExt};
2323
use ginepro::LoadBalancedChannel;
2424
use tonic::{Code, Request, Response, Status, Streaming};
25-
use tracing::{Span, debug, info, instrument};
25+
use tracing::Span;
2626

2727
use super::{
2828
BoxStream, Client, Error, create_grpc_client, errors::grpc_to_http_code,
@@ -36,7 +36,7 @@ use crate::{
3636
BidiStreamingChunkerTokenizationTaskRequest, ChunkerTokenizationTaskRequest,
3737
chunkers_service_client::ChunkersServiceClient,
3838
},
39-
caikit_data_model::nlp::{ChunkerTokenizationStreamResult, Token, TokenizationResults},
39+
caikit_data_model::nlp::{ChunkerTokenizationStreamResult, TokenizationResults},
4040
grpc::health::v1::{HealthCheckRequest, health_client::HealthClient},
4141
},
4242
utils::trace::trace_context_from_grpc_response,
@@ -50,14 +50,12 @@ pub const DEFAULT_CHUNKER_ID: &str = "whole_doc_chunker";
5050
type StreamingTokenizationResult =
5151
Result<Response<Streaming<ChunkerTokenizationStreamResult>>, Status>;
5252

53-
#[cfg_attr(test, faux::create)]
5453
#[derive(Clone)]
5554
pub struct ChunkerClient {
5655
client: ChunkersServiceClient<OtelGrpcService<LoadBalancedChannel>>,
5756
health_client: HealthClient<OtelGrpcService<LoadBalancedChannel>>,
5857
}
5958

60-
#[cfg_attr(test, faux::methods)]
6159
impl ChunkerClient {
6260
pub async fn new(config: &ServiceConfig) -> Self {
6361
let client = create_grpc_client(DEFAULT_PORT, config, ChunkersServiceClient::new).await;
@@ -68,28 +66,24 @@ impl ChunkerClient {
6866
}
6967
}
7068

71-
#[instrument(skip_all, fields(model_id))]
7269
pub async fn tokenization_task_predict(
7370
&self,
7471
model_id: &str,
7572
request: ChunkerTokenizationTaskRequest,
7673
) -> Result<TokenizationResults, Error> {
7774
let mut client = self.client.clone();
7875
let request = request_with_headers(request, model_id);
79-
debug!(?request, "sending client request");
8076
let response = client.chunker_tokenization_task_predict(request).await?;
8177
let span = Span::current();
8278
trace_context_from_grpc_response(&span, &response);
8379
Ok(response.into_inner())
8480
}
8581

86-
#[instrument(skip_all, fields(model_id))]
8782
pub async fn bidi_streaming_tokenization_task_predict(
8883
&self,
8984
model_id: &str,
9085
request_stream: BoxStream<BidiStreamingChunkerTokenizationTaskRequest>,
9186
) -> Result<BoxStream<Result<ChunkerTokenizationStreamResult, Error>>, Error> {
92-
info!("sending client stream request");
9387
let mut client = self.client.clone();
9488
let request = request_with_headers(request_stream, model_id);
9589
// NOTE: this is an ugly workaround to avoid bogus higher-ranked lifetime errors.
@@ -103,7 +97,6 @@ impl ChunkerClient {
10397
}
10498
}
10599

106-
#[cfg_attr(test, faux::methods)]
107100
#[async_trait]
108101
impl Client for ChunkerClient {
109102
fn name(&self) -> &str {
@@ -144,108 +137,3 @@ fn request_with_headers<T>(request: T, model_id: &str) -> Request<T> {
144137
.insert(MODEL_ID_HEADER_NAME, model_id.parse().unwrap());
145138
request
146139
}
147-
148-
/// Unary tokenization result of the entire doc
149-
#[instrument(skip_all)]
150-
pub fn tokenize_whole_doc(request: ChunkerTokenizationTaskRequest) -> TokenizationResults {
151-
let codepoint_count = request.text.chars().count() as i64;
152-
TokenizationResults {
153-
results: vec![Token {
154-
start: 0,
155-
end: codepoint_count,
156-
text: request.text,
157-
}],
158-
token_count: 1, // entire doc
159-
}
160-
}
161-
162-
/// Streaming tokenization result for the entire doc stream
163-
#[instrument(skip_all)]
164-
pub async fn tokenize_whole_doc_stream(
165-
request: impl Stream<Item = BidiStreamingChunkerTokenizationTaskRequest>,
166-
) -> Result<ChunkerTokenizationStreamResult, Error> {
167-
let (text, index_vec): (String, Vec<i64>) = request
168-
.map(|r| (r.text_stream, r.input_index_stream))
169-
.collect()
170-
.await;
171-
let codepoint_count = text.chars().count() as i64;
172-
let input_end_index = index_vec.last().copied().unwrap_or_default();
173-
Ok(ChunkerTokenizationStreamResult {
174-
results: vec![Token {
175-
start: 0,
176-
end: codepoint_count,
177-
text,
178-
}],
179-
token_count: 1, // entire doc/stream
180-
processed_index: codepoint_count,
181-
start_index: 0,
182-
input_start_index: 0,
183-
input_end_index,
184-
})
185-
}
186-
187-
#[cfg(test)]
188-
mod tests {
189-
use super::*;
190-
191-
#[test]
192-
fn test_tokenize_whole_doc() {
193-
let request = ChunkerTokenizationTaskRequest {
194-
text: "Lorem ipsum dolor sit amet consectetur adipiscing \
195-
elit sed do eiusmod tempor incididunt ut labore et dolore \
196-
magna aliqua."
197-
.into(),
198-
};
199-
let expected_response = TokenizationResults {
200-
results: vec![Token {
201-
start: 0,
202-
end: 121,
203-
text: "Lorem ipsum dolor sit amet consectetur \
204-
adipiscing elit sed do eiusmod tempor incididunt \
205-
ut labore et dolore magna aliqua."
206-
.into(),
207-
}],
208-
token_count: 1,
209-
};
210-
let response = tokenize_whole_doc(request);
211-
assert_eq!(response, expected_response)
212-
}
213-
214-
#[tokio::test]
215-
async fn test_tokenize_whole_doc_stream() {
216-
let request = futures::stream::iter(vec![
217-
BidiStreamingChunkerTokenizationTaskRequest {
218-
text_stream: "Lorem ipsum dolor sit amet ".into(),
219-
input_index_stream: 0,
220-
},
221-
BidiStreamingChunkerTokenizationTaskRequest {
222-
text_stream: "consectetur adipiscing elit ".into(),
223-
input_index_stream: 1,
224-
},
225-
BidiStreamingChunkerTokenizationTaskRequest {
226-
text_stream: "sed do eiusmod tempor incididunt ".into(),
227-
input_index_stream: 2,
228-
},
229-
BidiStreamingChunkerTokenizationTaskRequest {
230-
text_stream: "ut labore et dolore magna aliqua.".into(),
231-
input_index_stream: 3,
232-
},
233-
]);
234-
let expected_response = ChunkerTokenizationStreamResult {
235-
results: vec![Token {
236-
start: 0,
237-
end: 121,
238-
text: "Lorem ipsum dolor sit amet consectetur adipiscing elit \
239-
sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
240-
.into(),
241-
}],
242-
token_count: 1,
243-
processed_index: 121,
244-
start_index: 0,
245-
input_start_index: 0,
246-
input_end_index: 3,
247-
};
248-
let response = tokenize_whole_doc_stream(request).await.unwrap();
249-
assert_eq!(response, expected_response);
250-
}
251-
}

0 commit comments

Comments
 (0)