Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions react/src/components/Chat/ChatCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ const PureChatCard: React.FC<ChatCardProps> = ({
const dropContainerRef = useRef<HTMLDivElement>(null);
const [fetchKey, updateFetchKey] = useUpdatableState('first');
const [startTime, setStartTime] = useState<number | null>(null);
const [endTime, setEndTime] = useState<number | null>(null);

const { agents } = useAIAgent();
const agent = agents.find((a) => a.id === chat.provider.agentId);
Expand All @@ -261,9 +262,6 @@ const PureChatCard: React.FC<ChatCardProps> = ({
const { error, messages, stop, status, sendMessage, setMessages } = useChat({
experimental_throttle: 100,
messages: chat.messages,
onFinish: () => {
setStartTime(null);
},
// Because there is an issue(https://github.com/vercel/ai/issues/8956) with useChat that does not run a new transport without an id change,
// we have to change the id and use fetch by utilizing useEventNotStable.
id: `chat-${baseURL}-${modelId}-${effectiveApiKey}`,
Expand Down Expand Up @@ -329,9 +327,28 @@ const PureChatCard: React.FC<ChatCardProps> = ({

const isStreaming = status === 'streaming' || status === 'submitted';

// TPS measurement window follows the standard LLM inference convention:
// start when the first output token arrives (status transitions to
// 'streaming') and stop when streaming ends (success, abort, or error).
// This excludes file upload, network RTT, and prefill (TTFT), so the
// displayed TPS reflects pure decode rate — the same definition used by
// vLLM, Ollama, NVIDIA GenAI-Perf, etc.
useEffect(() => {
if (status === 'streaming' && startTime === null) {
setStartTime(Date.now());
}
}, [status, startTime]);

useEffect(() => {
if (!isStreaming && startTime !== null && endTime === null) {
setEndTime(Date.now());
}
}, [isStreaming, startTime, endTime]);

// Helper function to handle message sending with files
const handleSendMessage = async (textContent: string, files?: File[]) => {
setStartTime(Date.now());
setStartTime(null);
setEndTime(null);

const parts: Array<
| { type: 'text'; text: string }
Expand Down Expand Up @@ -540,6 +557,7 @@ const PureChatCard: React.FC<ChatCardProps> = ({
input={input}
isStreaming={isStreaming}
startTime={startTime}
endTime={endTime}
/>
<ChatInput
disabled={!baseURL}
Expand Down
3 changes: 3 additions & 0 deletions react/src/components/Chat/ChatMessages.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@ interface ChatMessageProps {
input: string;
isStreaming: boolean;
startTime: number | null;
endTime: number | null;
}

const ChatMessages: React.FC<ChatMessageProps> = ({
messages,
input,
isStreaming,
startTime,
endTime,
}) => {
const { token } = theme.useToken();
return (
Expand All @@ -44,6 +46,7 @@ const ChatMessages: React.FC<ChatMessageProps> = ({
messages={messages}
input={input}
startTime={startTime}
endTime={endTime}
/>
</BAIFlex>
</BAIFlex>
Expand Down
47 changes: 22 additions & 25 deletions react/src/components/Chat/ChatTokenCounter.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -9,52 +9,49 @@ import { Typography, Tag, Divider } from 'antd';
import { BAIFlex } from 'backend.ai-ui';
import { t } from 'i18next';
import { map, last } from 'lodash-es';
import React, { useMemo } from 'react';
import React from 'react';

interface ChatTokenCounterProps {
input: string;
messages: UIMessage[];
startTime: number | null;
endTime: number | null;
style?: React.CSSProperties;
}

const ChatTokenCounter: React.FC<ChatTokenCounterProps> = ({
input,
messages,
startTime,
endTime,
}) => {
'use memo';

const inputTokenCount = useTokenCount(input);
const allChatMessageString = useMemo(() => {
return map(messages, (message) =>
message?.parts
?.filter((part) => part.type === 'text')
.map((part) => part.text)
.join(''),
).join('');
}, [messages]);
const allChatMessageString = map(messages, (message) =>
message?.parts
?.filter((part) => part.type === 'text')
.map((part) => part.text)
.join(''),
).join('');
const chatsTokenCount = useTokenCount(allChatMessageString);
const totalTokenCount = inputTokenCount + chatsTokenCount;
const lastAssistantMessageString = useMemo(() => {
const lastAssistantMessage = last(messages);
if (lastAssistantMessage?.role === 'assistant') {
return (
lastAssistantMessage?.parts
const lastAssistantMessage = last(messages);
const lastAssistantMessageString =
lastAssistantMessage?.role === 'assistant'
? lastAssistantMessage?.parts
?.filter((part) => part.type === 'text')
.map((part) => part.text)
.join('') || ''
);
} else {
return '';
}
}, [messages]);
: '';

const lastAssistantTokenCount = useTokenCount(lastAssistantMessageString);
const tokenPerSecond = useMemo(() => {
return lastAssistantTokenCount > 0 && startTime
? // eslint-disable-next-line react-hooks/purity
lastAssistantTokenCount / ((Date.now() - startTime) / 1000)
: 0;
}, [lastAssistantTokenCount, startTime]);
let tokenPerSecond = 0;
if (lastAssistantTokenCount > 0 && startTime) {
// eslint-disable-next-line react-hooks/purity
const elapsedSec = ((endTime ?? Date.now()) - startTime) / 1000;
tokenPerSecond = elapsedSec > 0 ? lastAssistantTokenCount / elapsedSec : 0;
}

return (
<BAIFlex justify="end" align="end">
Expand Down
Loading