|
16 | 16 | from typing_extensions import Self |
17 | 17 |
|
18 | 18 | from aiperf.common.aiperf_logger import AIPerfLogger |
19 | | -from aiperf.common.constants import NANOS_PER_SECOND, STAT_KEYS |
| 19 | +from aiperf.common.constants import STAT_KEYS |
20 | 20 | from aiperf.common.enums import CreditPhase, SSEFieldType |
21 | 21 | from aiperf.common.enums.metric_enums import MetricValueTypeT |
22 | 22 | from aiperf.common.exceptions import InvalidInferenceResultError |
@@ -707,24 +707,33 @@ class ParsedResponse(AIPerfBaseModel): |
707 | 707 | ) |
708 | 708 |
|
709 | 709 |
|
710 | | -class ParsedResponseRecord(AIPerfBaseModel): |
711 | | - """Record of a request and its associated responses, already parsed and ready for metrics.""" |
| 710 | +class TokenCounts(AIPerfBaseModel): |
| 711 | + """Token counts for a record.""" |
712 | 712 |
|
713 | | - request: RequestRecord = Field(description="The original request record") |
714 | | - responses: list[ParsedResponse] = Field(description="The parsed responses.") |
715 | | - input_token_count: int | None = Field( |
| 713 | + input: int | None = Field( |
716 | 714 | default=None, |
717 | 715 | description="The number of tokens in the input (client-side tokenization). If None, the number of tokens could not be calculated.", |
718 | 716 | ) |
719 | | - output_token_count: int | None = Field( |
| 717 | + output: int | None = Field( |
720 | 718 | default=None, |
721 | 719 | description="The number of output tokens across all responses (client-side tokenization). If None, the number of tokens could not be calculated.", |
722 | 720 | ) |
723 | | - reasoning_token_count: int | None = Field( |
| 721 | + reasoning: int | None = Field( |
724 | 722 | default=None, |
725 | 723 | description="The number of reasoning tokens across all responses (client-side tokenization). If None, the number of tokens could not be calculated, or the model does not support reasoning.", |
726 | 724 | ) |
727 | 725 |
|
| 726 | + |
| 727 | +class ParsedResponseRecord(AIPerfBaseModel): |
| 728 | + """Record of a request and its associated responses, already parsed and ready for metrics.""" |
| 729 | + |
| 730 | + request: RequestRecord = Field(..., description="The original request record") |
| 731 | + responses: list[ParsedResponse] = Field(..., description="The parsed responses.") |
| 732 | + token_counts: TokenCounts | None = Field( |
| 733 | + default=None, |
| 734 | + description="The token counts for the response. If None, the token counts could not be calculated.", |
| 735 | + ) |
| 736 | + |
728 | 737 | @cached_property |
729 | 738 | def start_perf_ns(self) -> int: |
730 | 739 | """Get the start time of the request in nanoseconds (perf_counter_ns).""" |
@@ -760,18 +769,6 @@ def content_responses(self) -> list[ParsedResponse]: |
760 | 769 | """ |
761 | 770 | return [response for response in self.responses if response.data] |
762 | 771 |
|
763 | | - @cached_property |
764 | | - def request_duration_ns(self) -> int: |
765 | | - """Get the duration of the request in nanoseconds.""" |
766 | | - return self.end_perf_ns - self.start_perf_ns |
767 | | - |
768 | | - @cached_property |
769 | | - def tokens_per_second(self) -> float | None: |
770 | | - """Get the number of tokens per second of the request.""" |
771 | | - if self.output_token_count is None or self.request_duration_ns == 0: |
772 | | - return None |
773 | | - return self.output_token_count / (self.request_duration_ns / NANOS_PER_SECOND) |
774 | | - |
775 | 772 | @property |
776 | 773 | def has_error(self) -> bool: |
777 | 774 | """Check if the response record has an error.""" |
|
0 commit comments