Skip to content

Commit d8e16ae

Browse files
authored
feat: add web reader api (#48)
1 parent 25f84e8 commit d8e16ae

File tree

9 files changed

+196
-0
lines changed

9 files changed

+196
-0
lines changed

examples/web_reader_example.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from zai import ZaiClient
2+
3+
4+
def web_reader_example():
5+
client = ZaiClient()
6+
response = client.web_reader.web_reader(
7+
url="https://www.example.com/",
8+
return_format="markdown", # or "text"
9+
retain_images=True,
10+
with_links_summary=True,
11+
)
12+
13+
# Print full response model
14+
print(response)
15+
16+
# Access structured fields
17+
if response.reader_result:
18+
data = response.reader_result
19+
print("Title:", data.title)
20+
print("Published:", data.published_time)
21+
print("URL:", data.url)
22+
print("Content length:", len(data.content or ""))
23+
24+
25+
if __name__ == "__main__":
26+
web_reader_example()

src/zai/_client.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from zai.api_resource.videos import Videos
2323
from zai.api_resource.voice import Voice
2424
from zai.api_resource.web_search import WebSearchApi
25+
from zai.api_resource.web_reader import WebReaderApi
2526
from zai.api_resource.file_parser import FileParser
2627

2728
from .core import (
@@ -152,6 +153,12 @@ def web_search(self) -> WebSearchApi:
152153

153154
return WebSearchApi(self)
154155

156+
@cached_property
157+
def web_reader(self) -> WebReaderApi:
158+
from zai.api_resource.web_reader import WebReaderApi
159+
160+
return WebReaderApi(self)
161+
155162
@cached_property
156163
def files(self) -> Files:
157164
from zai.api_resource.files import Files

src/zai/api_resource/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
Videos,
1919
)
2020
from .web_search import WebSearchApi
21+
from .web_reader import WebReaderApi
2122
from .file_parser import FileParser
2223

2324

@@ -36,6 +37,7 @@
3637
'Audio',
3738
'Moderations',
3839
'WebSearchApi',
40+
'WebReaderApi',
3941
'Agents',
4042
'FileParser',
4143
]
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .web_reader import WebReaderApi
2+
3+
__all__ = ["WebReaderApi"]
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, Optional
4+
5+
import httpx
6+
7+
from zai.core import (
8+
NOT_GIVEN,
9+
BaseAPI,
10+
Body,
11+
Headers,
12+
NotGiven,
13+
deepcopy_minimal,
14+
make_request_options,
15+
maybe_transform,
16+
)
17+
from zai.types.web_reader.web_reader_params import WebReaderParams
18+
from zai.types.web_reader.web_reader_resp import WebReaderResult
19+
20+
if TYPE_CHECKING:
21+
from zai._client import ZaiClient
22+
23+
24+
class WebReaderApi(BaseAPI):
25+
def __init__(self, client: "ZaiClient") -> None:
26+
super().__init__(client)
27+
28+
def web_reader(
29+
self,
30+
*,
31+
url: str,
32+
request_id: Optional[str] | NotGiven = NOT_GIVEN,
33+
user_id: Optional[str] | NotGiven = NOT_GIVEN,
34+
timeout: Optional[str] | NotGiven = NOT_GIVEN,
35+
no_cache: Optional[bool] | NotGiven = NOT_GIVEN,
36+
return_format: Optional[str] | NotGiven = NOT_GIVEN,
37+
retain_images: Optional[bool] | NotGiven = NOT_GIVEN,
38+
no_gfm: Optional[bool] | NotGiven = NOT_GIVEN,
39+
keep_img_data_url: Optional[bool] | NotGiven = NOT_GIVEN,
40+
with_images_summary: Optional[bool] | NotGiven = NOT_GIVEN,
41+
with_links_summary: Optional[bool] | NotGiven = NOT_GIVEN,
42+
extra_headers: Headers | None = None,
43+
extra_body: Body | None = None,
44+
timeout_override: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
45+
) -> WebReaderResult:
46+
body = deepcopy_minimal(
47+
{
48+
"url": url,
49+
"request_id": request_id,
50+
"user_id": user_id,
51+
"timeout": timeout,
52+
"no_cache": no_cache,
53+
"return_format": return_format,
54+
"retain_images": retain_images,
55+
"no_gfm": no_gfm,
56+
"keep_img_data_url": keep_img_data_url,
57+
"with_images_summary": with_images_summary,
58+
"with_links_summary": with_links_summary,
59+
}
60+
)
61+
return self._post(
62+
"/reader",
63+
body=maybe_transform(body, WebReaderParams),
64+
options=make_request_options(
65+
extra_headers=extra_headers, extra_body=extra_body, timeout=timeout_override
66+
),
67+
cast_type=WebReaderResult,
68+
)
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from .web_reader_params import WebReaderParams
2+
from .web_reader_resp import WebReaderResult, ReaderData
3+
4+
__all__ = [
5+
"WebReaderParams",
6+
"ReaderData",
7+
"WebReaderResult",
8+
]
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from __future__ import annotations
2+
3+
from typing import Optional
4+
5+
from typing_extensions import TypedDict
6+
7+
8+
class WebReaderParams(TypedDict, total=False):
9+
"""
10+
Web reader request parameters
11+
12+
Attributes:
13+
url (str): Target page URL to read
14+
request_id (str): Unique request task ID (6-64 chars)
15+
user_id (str): Unique end-user ID (6-128 chars)
16+
timeout (str): Request timeout in seconds
17+
no_cache (bool): Disable cache
18+
return_format (str): Return format, e.g. 'markdown' or 'text'
19+
retain_images (bool): Keep images in output
20+
no_gfm (bool): Disable GitHub Flavored Markdown
21+
keep_img_data_url (bool): Keep image data URLs
22+
with_images_summary (bool): Include images summary
23+
with_links_summary (bool): Include links summary
24+
"""
25+
26+
url: str
27+
request_id: Optional[str]
28+
user_id: Optional[str]
29+
timeout: Optional[str]
30+
no_cache: Optional[bool]
31+
return_format: Optional[str]
32+
retain_images: Optional[bool]
33+
no_gfm: Optional[bool]
34+
keep_img_data_url: Optional[bool]
35+
with_images_summary: Optional[bool]
36+
with_links_summary: Optional[bool]
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from typing import Any, Dict, Optional
2+
3+
from pydantic import Field
4+
5+
from zai.core import BaseModel
6+
7+
8+
class ReaderData(BaseModel):
9+
images: Optional[Dict[str, str]] = None
10+
links: Optional[Dict[str, str]] = None
11+
title: Optional[str] = None
12+
description: Optional[str] = None
13+
url: Optional[str] = None
14+
content: Optional[str] = None
15+
published_time: Optional[str] = Field(default=None, alias="publishedTime")
16+
metadata: Optional[Dict[str, Any]] = None
17+
external: Optional[Dict[str, Any]] = None
18+
19+
20+
class WebReaderResult(BaseModel):
21+
reader_result: Optional[ReaderData] = None
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import logging
2+
import logging.config
3+
4+
import zai
5+
from zai import ZaiClient
6+
7+
8+
def test_web_reader(logging_conf):
9+
logging.config.dictConfig(logging_conf) # type: ignore
10+
client = ZaiClient() # Fill in your own API Key
11+
try:
12+
response = client.web_reader.web_reader(
13+
url="https://www.example.com/",
14+
return_format="markdown",
15+
retain_images=True,
16+
with_links_summary=True,
17+
)
18+
print(response)
19+
20+
except zai.core._errors.APIRequestFailedError as err:
21+
print(err)
22+
except zai.core._errors.APIInternalError as err:
23+
print(err)
24+
except zai.core._errors.APIStatusError as err:
25+
print(err)

0 commit comments

Comments
 (0)