feat: add web reader api (#48)

tomsun28 · web-flow · commit d8e16ae166df · 2025-11-15T19:12:16.000+08:00
diff --git a/examples/web_reader_example.py b/examples/web_reader_example.py
@@ -0,0 +1,26 @@
+from zai import ZaiClient
+
+
+def web_reader_example():
+    client = ZaiClient()
+    response = client.web_reader.web_reader(
+        url="https://www.example.com/",
+        return_format="markdown",  # or "text"
+        retain_images=True,
+        with_links_summary=True,
+    )
+
+    # Print full response model
+    print(response)
+
+    # Access structured fields
+    if response.reader_result:
+        data = response.reader_result
+        print("Title:", data.title)
+        print("Published:", data.published_time)
+        print("URL:", data.url)
+        print("Content length:", len(data.content or ""))
+
+
+if __name__ == "__main__":
+    web_reader_example()
diff --git a/src/zai/_client.py b/src/zai/_client.py
@@ -22,6 +22,7 @@
     from zai.api_resource.videos import Videos
     from zai.api_resource.voice import Voice
     from zai.api_resource.web_search import WebSearchApi
+    from zai.api_resource.web_reader import WebReaderApi
     from zai.api_resource.file_parser import FileParser
 
 from .core import (
@@ -152,6 +153,12 @@ def web_search(self) -> WebSearchApi:
 
         return WebSearchApi(self)
 
+    @cached_property
+    def web_reader(self) -> WebReaderApi:
+        from zai.api_resource.web_reader import WebReaderApi
+
+        return WebReaderApi(self)
+
     @cached_property
     def files(self) -> Files:
         from zai.api_resource.files import Files
diff --git a/src/zai/api_resource/__init__.py b/src/zai/api_resource/__init__.py
@@ -18,6 +18,7 @@
 	Videos,
 )
 from .web_search import WebSearchApi
+from .web_reader import WebReaderApi
 from .file_parser import FileParser
 
 
@@ -36,6 +37,7 @@
 	'Audio',
 	'Moderations',
 	'WebSearchApi',
+	'WebReaderApi',
 	'Agents',
 	'FileParser',
 ]
diff --git a/src/zai/api_resource/web_reader/__init__.py b/src/zai/api_resource/web_reader/__init__.py
@@ -0,0 +1,3 @@
+from .web_reader import WebReaderApi
+
+__all__ = ["WebReaderApi"]
diff --git a/src/zai/api_resource/web_reader/web_reader.py b/src/zai/api_resource/web_reader/web_reader.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
+
+import httpx
+
+from zai.core import (
+    NOT_GIVEN,
+    BaseAPI,
+    Body,
+    Headers,
+    NotGiven,
+    deepcopy_minimal,
+    make_request_options,
+    maybe_transform,
+)
+from zai.types.web_reader.web_reader_params import WebReaderParams
+from zai.types.web_reader.web_reader_resp import WebReaderResult
+
+if TYPE_CHECKING:
+    from zai._client import ZaiClient
+
+
+class WebReaderApi(BaseAPI):
+    def __init__(self, client: "ZaiClient") -> None:
+        super().__init__(client)
+
+    def web_reader(
+        self,
+        *,
+        url: str,
+        request_id: Optional[str] | NotGiven = NOT_GIVEN,
+        user_id: Optional[str] | NotGiven = NOT_GIVEN,
+        timeout: Optional[str] | NotGiven = NOT_GIVEN,
+        no_cache: Optional[bool] | NotGiven = NOT_GIVEN,
+        return_format: Optional[str] | NotGiven = NOT_GIVEN,
+        retain_images: Optional[bool] | NotGiven = NOT_GIVEN,
+        no_gfm: Optional[bool] | NotGiven = NOT_GIVEN,
+        keep_img_data_url: Optional[bool] | NotGiven = NOT_GIVEN,
+        with_images_summary: Optional[bool] | NotGiven = NOT_GIVEN,
+        with_links_summary: Optional[bool] | NotGiven = NOT_GIVEN,
+        extra_headers: Headers | None = None,
+        extra_body: Body | None = None,
+        timeout_override: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> WebReaderResult:
+        body = deepcopy_minimal(
+            {
+                "url": url,
+                "request_id": request_id,
+                "user_id": user_id,
+                "timeout": timeout,
+                "no_cache": no_cache,
+                "return_format": return_format,
+                "retain_images": retain_images,
+                "no_gfm": no_gfm,
+                "keep_img_data_url": keep_img_data_url,
+                "with_images_summary": with_images_summary,
+                "with_links_summary": with_links_summary,
+            }
+        )
+        return self._post(
+            "/reader",
+            body=maybe_transform(body, WebReaderParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_body=extra_body, timeout=timeout_override
+            ),
+            cast_type=WebReaderResult,
+        )
diff --git a/src/zai/types/web_reader/__init__.py b/src/zai/types/web_reader/__init__.py
@@ -0,0 +1,8 @@
+from .web_reader_params import WebReaderParams
+from .web_reader_resp import WebReaderResult, ReaderData
+
+__all__ = [
+    "WebReaderParams",
+    "ReaderData",
+    "WebReaderResult",
+]
diff --git a/src/zai/types/web_reader/web_reader_params.py b/src/zai/types/web_reader/web_reader_params.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+from typing import Optional
+
+from typing_extensions import TypedDict
+
+
+class WebReaderParams(TypedDict, total=False):
+    """
+    Web reader request parameters
+
+    Attributes:
+        url (str): Target page URL to read
+        request_id (str): Unique request task ID (6-64 chars)
+        user_id (str): Unique end-user ID (6-128 chars)
+        timeout (str): Request timeout in seconds
+        no_cache (bool): Disable cache
+        return_format (str): Return format, e.g. 'markdown' or 'text'
+        retain_images (bool): Keep images in output
+        no_gfm (bool): Disable GitHub Flavored Markdown
+        keep_img_data_url (bool): Keep image data URLs
+        with_images_summary (bool): Include images summary
+        with_links_summary (bool): Include links summary
+    """
+
+    url: str
+    request_id: Optional[str]
+    user_id: Optional[str]
+    timeout: Optional[str]
+    no_cache: Optional[bool]
+    return_format: Optional[str]
+    retain_images: Optional[bool]
+    no_gfm: Optional[bool]
+    keep_img_data_url: Optional[bool]
+    with_images_summary: Optional[bool]
+    with_links_summary: Optional[bool]
diff --git a/src/zai/types/web_reader/web_reader_resp.py b/src/zai/types/web_reader/web_reader_resp.py
@@ -0,0 +1,21 @@
+from typing import Any, Dict, Optional
+
+from pydantic import Field
+
+from zai.core import BaseModel
+
+
+class ReaderData(BaseModel):
+    images: Optional[Dict[str, str]] = None
+    links: Optional[Dict[str, str]] = None
+    title: Optional[str] = None
+    description: Optional[str] = None
+    url: Optional[str] = None
+    content: Optional[str] = None
+    published_time: Optional[str] = Field(default=None, alias="publishedTime")
+    metadata: Optional[Dict[str, Any]] = None
+    external: Optional[Dict[str, Any]] = None
+
+
+class WebReaderResult(BaseModel):
+    reader_result: Optional[ReaderData] = None
diff --git a/tests/integration_tests/test_web_reader.py b/tests/integration_tests/test_web_reader.py
@@ -0,0 +1,25 @@
+import logging
+import logging.config
+
+import zai
+from zai import ZaiClient
+
+
+def test_web_reader(logging_conf):
+    logging.config.dictConfig(logging_conf)  # type: ignore
+    client = ZaiClient()  # Fill in your own API Key
+    try:
+        response = client.web_reader.web_reader(
+            url="https://www.example.com/",
+            return_format="markdown",
+            retain_images=True,
+            with_links_summary=True,
+        )
+        print(response)
+
+    except zai.core._errors.APIRequestFailedError as err:
+        print(err)
+    except zai.core._errors.APIInternalError as err:
+        print(err)
+    except zai.core._errors.APIStatusError as err:
+        print(err)

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@`
`18`	`18`	`Videos,`
`19`	`19`	`)`
`20`	`20`	`from .web_search import WebSearchApi`
	`21`	`+from .web_reader import WebReaderApi`
`21`	`22`	`from .file_parser import FileParser`
`22`	`23`
`23`	`24`
`@@ -36,6 +37,7 @@`
`36`	`37`	`'Audio',`
`37`	`38`	`'Moderations',`
`38`	`39`	`'WebSearchApi',`
	`40`	`+ 'WebReaderApi',`
`39`	`41`	`'Agents',`
`40`	`42`	`'FileParser',`
`41`	`43`	`]`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .web_reader import WebReaderApi`
	`2`	`+`
	`3`	`+__all__ = ["WebReaderApi"]`