diff --git a/tools/server/server-chat.cpp b/tools/server/server-chat.cpp index 02858a2a028c..8be70208bd01 100644 --- a/tools/server/server-chat.cpp +++ b/tools/server/server-chat.cpp @@ -217,26 +217,34 @@ json server_chat_convert_responses_to_chatcmpl(const json & response_body) { } else if (exists_and_is_array(item, "summary") && exists_and_is_string(item, "type") && item.at("type") == "reasoning") { - // #responses_create-input-input_item_list-item-reasoning - if (!exists_and_is_array(item, "content")) { - throw std::invalid_argument("item['content'] is not an array"); - } - if (item.at("content").empty()) { - throw std::invalid_argument("item['content'] is empty"); + std::string reasoning_text; + + if (exists_and_is_array(item, "content") && !item.at("content").empty()) { + if (!exists_and_is_string(item.at("content")[0], "text")) { + throw std::invalid_argument("item['content']['text'] is not a string"); + } + reasoning_text = item.at("content")[0].at("text").get(); + } else if (!item.at("summary").empty()) { + for (const auto & summary_item : item.at("summary")) { + if (exists_and_is_string(summary_item, "text")) { + reasoning_text += summary_item.at("text").get(); + } + } } - if (!exists_and_is_string(item.at("content")[0], "text")) { - throw std::invalid_argument("item['content']['text'] is not a string"); + + if (reasoning_text.empty()) { + continue; } if (merge_prev) { auto & prev_msg = chatcmpl_messages.back(); - prev_msg["reasoning_content"] = item.at("content")[0].at("text"); + prev_msg["reasoning_content"] = reasoning_text; } else { chatcmpl_messages.push_back(json { {"role", "assistant"}, {"content", json::array()}, - {"reasoning_content", item.at("content")[0].at("text")}, + {"reasoning_content", reasoning_text}, }); } } else { diff --git a/tools/server/server-task.cpp b/tools/server/server-task.cpp index a9ebac013f81..c6007b8ab03a 100644 --- a/tools/server/server-task.cpp +++ b/tools/server/server-task.cpp @@ -624,6 +624,58 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { std::vector server_sent_events; std::vector output; + std::time_t t = std::time(nullptr); + + auto next_seq = [&]() -> uint64_t { + GGML_ASSERT(oai_seq_num_ptr != nullptr); + return (*oai_seq_num_ptr)++; + }; + + auto usage = [&]() { + return json { + {"input_tokens", n_prompt_tokens}, + {"output_tokens", n_decoded}, + {"total_tokens", n_decoded + n_prompt_tokens}, + {"input_tokens_details", json { + {"cached_tokens", n_prompt_tokens_cache} + }}, + }; + }; + + auto response_obj = [&](const std::string & status, const json & output_items, json usage_value) { + return json { + {"id", oai_resp_id}, + {"object", "response"}, + {"created_at", t}, + {"status", status}, + {"error", nullptr}, + {"incomplete_details", nullptr}, + {"instructions", nullptr}, + {"max_output_tokens", nullptr}, + {"model", oaicompat_model}, + {"usage", std::move(usage_value)}, + {"output", output_items}, + {"tools", json::array()}, + }; + }; + + auto push_event = [&](const char * event, json data) { + data["sequence_number"] = next_seq(); + server_sent_events.push_back(json { + {"event", event}, + {"data", std::move(data)}, + }); + }; + + auto push_item_event = [&](const char * event, json data) { + data["sequence_number"] = next_seq(); + data["output_index"] = index; + server_sent_events.push_back(json { + {"event", event}, + {"data", std::move(data)}, + }); + }; + if (oaicompat_msg.reasoning_content != "") { const json output_item = json { {"id", oai_resp_reasoning_id}, @@ -634,58 +686,52 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"type", "reasoning_text"}, }})}, {"encrypted_content", ""}, + {"status", "completed"}, }; - server_sent_events.push_back(json { - {"event", "response.output_item.done"}, - {"data", json { - {"type", "response.output_item.done"}, - {"item", output_item} - }} + push_item_event("response.output_item.done", json { + {"type", "response.output_item.done"}, + {"item", output_item}, }); + output.push_back(output_item); } if (oaicompat_msg.content != "") { - server_sent_events.push_back(json { - {"event", "response.output_text.done"}, - {"data", json { - {"type", "response.output_text.done"}, - {"item_id", oai_resp_message_id}, - {"text", oaicompat_msg.content} - }} + push_item_event("response.output_text.done", json { + {"type", "response.output_text.done"}, + {"item_id", oai_resp_message_id}, + {"content_index", 0}, + {"text", oaicompat_msg.content}, }); const json content_part = { {"type", "output_text"}, {"annotations", json::array()}, {"logprobs", json::array()}, - {"text", oaicompat_msg.content} + {"text", oaicompat_msg.content}, }; - server_sent_events.push_back(json { - {"event", "response.content_part.done"}, - {"data", json { - {"type", "response.content_part.done"}, - {"item_id", oai_resp_message_id}, - {"part", content_part} - }} + push_item_event("response.content_part.done", json { + {"type", "response.content_part.done"}, + {"item_id", oai_resp_message_id}, + {"content_index", 0}, + {"part", content_part}, }); + const json output_item = { {"type", "message"}, {"status", "completed"}, {"id", oai_resp_message_id}, - {"content", json::array({content_part})}, - {"role", "assistant"} + {"content", json::array({ content_part })}, + {"role", "assistant"}, }; - server_sent_events.push_back(json { - {"event", "response.output_item.done"}, - {"data", json { - {"type", "response.output_item.done"}, - {"item", output_item} - }} + push_item_event("response.output_item.done", json { + {"type", "response.output_item.done"}, + {"item", output_item}, }); + output.push_back(output_item); } @@ -696,38 +742,20 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() { {"status", "completed"}, {"arguments", tool_call.arguments}, {"call_id", "call_" + tool_call.id}, - {"name", tool_call.name} + {"name", tool_call.name}, }; - server_sent_events.push_back(json { - {"event", "response.output_item.done"}, - {"data", json { - {"type", "response.output_item.done"}, - {"item", output_item} - }} + + push_item_event("response.output_item.done", json { + {"type", "response.output_item.done"}, + {"item", output_item}, }); + output.push_back(output_item); } - std::time_t t = std::time(0); - server_sent_events.push_back(json { - {"event", "response.completed"}, - {"data", json { - {"type", "response.completed"}, - {"response", json { - {"id", oai_resp_id}, - {"object", "response"}, - {"created_at", t}, - {"status", "completed"}, - {"model", oaicompat_model}, - {"output", output}, - {"usage", json { - {"input_tokens", n_prompt_tokens}, - {"output_tokens", n_decoded}, - {"total_tokens", n_decoded + n_prompt_tokens}, - {"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }}, - }} - }}, - }} + push_event("response.completed", json { + {"type", "response.completed"}, + {"response", response_obj("completed", output, usage())}, }); return server_sent_events; @@ -1020,6 +1048,7 @@ void server_task_result_cmpl_partial::update(task_result_state & state) { oai_resp_reasoning_id = state.oai_resp_reasoning_id; oai_resp_message_id = state.oai_resp_message_id; oai_resp_fc_id = state.oai_resp_fc_id; + oai_seq_num_ptr = &state.oai_seq_num; // track if the accumulated message has any reasoning content anthropic_has_reasoning = !state.chat_msg.reasoning_content.empty(); @@ -1181,127 +1210,152 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() { json server_task_result_cmpl_partial::to_json_oaicompat_resp() { std::vector events; - if (n_decoded == 1) { + auto next_seq = [&]() -> uint64_t { + GGML_ASSERT(oai_seq_num_ptr != nullptr); + return (*oai_seq_num_ptr)++; + }; + + const std::time_t t = std::time(nullptr); + + auto response_obj = [&](const char * status) { + return json { + {"id", oai_resp_id}, + {"object", "response"}, + {"created_at", t}, + {"status", status}, + {"error", nullptr}, + {"incomplete_details", nullptr}, + {"instructions", nullptr}, + {"max_output_tokens", nullptr}, + {"model", oaicompat_model}, + {"usage", nullptr}, + {"output", json::array()}, + {"tools", json::array()}, + }; + }; + + auto push_event = [&](const char * event, json data) { + data["sequence_number"] = next_seq(); + events.push_back(json { - {"event", "response.created"}, - {"data", json { - {"type", "response.created"}, - {"response", json { - {"id", oai_resp_id}, - {"object", "response"}, - {"status", "in_progress"}, - }}, - }}, + {"event", event}, + {"data", std::move(data)}, }); + }; + + auto push_item_event = [&](const char * event, json data) { + data["sequence_number"] = next_seq(); + data["output_index"] = index; + events.push_back(json { - {"event", "response.in_progress"}, - {"data", json { - {"type", "response.in_progress"}, - {"response", json { - {"id", oai_resp_id}, - {"object", "response"}, - {"status", "in_progress"}, - }}, - }}, + {"event", event}, + {"data", std::move(data)}, + }); + }; + + if (n_decoded == 1) { + push_event("response.created", json { + {"type", "response.created"}, + {"response", response_obj("in_progress")}, + }); + + push_event("response.in_progress", json { + {"type", "response.in_progress"}, + {"response", response_obj("in_progress")}, }); } for (const common_chat_msg_diff & diff : oaicompat_msg_diffs) { if (!diff.reasoning_content_delta.empty()) { if (!thinking_block_started) { - events.push_back(json { - {"event", "response.output_item.added"}, - {"data", json { - {"type", "response.output_item.added"}, - {"item", json { - {"id", oai_resp_reasoning_id}, - {"summary", json::array()}, - {"type", "reasoning"}, - {"content", json::array()}, - {"encrypted_content", ""}, - {"status", "in_progress"}, - }}, + push_item_event("response.output_item.added", json { + {"type", "response.output_item.added"}, + {"item", json { + {"id", oai_resp_reasoning_id}, + {"summary", json::array({ + json { + {"type", "summary_text"}, + {"text", ""}, + } + })}, + {"type", "reasoning"}, + {"content", json::array()}, + {"encrypted_content", ""}, + {"status", "in_progress"}, }}, }); + thinking_block_started = true; } - events.push_back(json { - {"event", "response.reasoning_text.delta"}, - {"data", json { - {"type", "response.reasoning_text.delta"}, - {"delta", diff.reasoning_content_delta}, - {"item_id", oai_resp_reasoning_id}, - }}, + + push_item_event("response.reasoning_summary_text.delta", json { + {"type", "response.reasoning_summary_text.delta"}, + {"delta", diff.reasoning_content_delta}, + {"item_id", oai_resp_reasoning_id}, + {"summary_index", 0}, }); } if (!diff.content_delta.empty()) { if (!text_block_started) { - events.push_back(json { - {"event", "response.output_item.added"}, - {"data", json { - {"type", "response.output_item.added"}, - {"item", json { - {"content", json::array()}, - {"id", oai_resp_message_id}, - {"role", "assistant"}, - {"status", "in_progress"}, - {"type", "message"}, - }}, + push_item_event("response.output_item.added", json { + {"type", "response.output_item.added"}, + {"item", json { + {"content", json::array()}, + {"id", oai_resp_message_id}, + {"role", "assistant"}, + {"status", "in_progress"}, + {"type", "message"}, }}, }); - events.push_back(json { - {"event", "response.content_part.added"}, - {"data", json { - {"type", "response.content_part.added"}, - {"item_id", oai_resp_message_id}, - {"part", json { - {"type", "output_text"}, - {"text", ""}, - }}, + + push_item_event("response.content_part.added", json { + {"type", "response.content_part.added"}, + {"item_id", oai_resp_message_id}, + {"content_index", 0}, + {"part", json { + {"type", "output_text"}, + {"text", ""}, + {"annotations", json::array()}, }}, }); + text_block_started = true; } - events.push_back(json { - {"event", "response.output_text.delta"}, - {"data", json { - {"type", "response.output_text.delta"}, - {"item_id", oai_resp_message_id}, - {"delta", diff.content_delta}, - }}, + + push_item_event("response.output_text.delta", json { + {"type", "response.output_text.delta"}, + {"item_id", oai_resp_message_id}, + {"content_index", 0}, + {"delta", diff.content_delta}, }); } if (!diff.tool_call_delta.name.empty()) { - events.push_back(json { - {"event", "response.output_item.added"}, - {"data", json { - {"type", "response.output_item.added"}, - {"item", json { - {"id", "fc_" + diff.tool_call_delta.id}, - {"arguments", ""}, - {"call_id", "call_" + diff.tool_call_delta.id}, - {"name", diff.tool_call_delta.name}, - {"type", "function_call"}, - {"status", "in_progress"}, - }}, + push_item_event("response.output_item.added", json { + {"type", "response.output_item.added"}, + {"item", json { + {"id", "fc_" + diff.tool_call_delta.id}, + {"arguments", ""}, + {"call_id", "call_" + diff.tool_call_delta.id}, + {"name", diff.tool_call_delta.name}, + {"type", "function_call"}, + {"status", "in_progress"}, }}, }); + oai_resp_fc_id = diff.tool_call_delta.id; } if (!diff.tool_call_delta.arguments.empty()) { - events.push_back(json { - {"event", "response.function_call_arguments.delta"}, - {"data", json { - {"type", "response.function_call_arguments.delta"}, - {"delta", diff.tool_call_delta.arguments}, - {"item_id", "fc_" + oai_resp_fc_id}, - }}, + push_item_event("response.function_call_arguments.delta", json { + {"type", "response.function_call_arguments.delta"}, + {"delta", diff.tool_call_delta.arguments}, + {"item_id", "fc_" + oai_resp_fc_id}, }); } } + return events; } diff --git a/tools/server/server-task.h b/tools/server/server-task.h index 293bdf053abf..06b9a1d016cd 100644 --- a/tools/server/server-task.h +++ b/tools/server/server-task.h @@ -120,6 +120,8 @@ struct task_result_state { const std::string oai_resp_message_id; std::string oai_resp_fc_id; // function call ID for current args delta + uint64_t oai_seq_num = 0; + task_result_state(const common_chat_parser_params & chat_parser_params); // parse partial tool calls and update the internal state @@ -372,6 +374,7 @@ struct server_task_result_cmpl_final : server_task_result { std::string oai_resp_id; std::string oai_resp_reasoning_id; std::string oai_resp_message_id; + uint64_t * oai_seq_num_ptr = nullptr; virtual bool is_stop() override { return true; // in stream mode, final responses are considered stop @@ -386,6 +389,7 @@ struct server_task_result_cmpl_final : server_task_result { oai_resp_id = state.oai_resp_id; oai_resp_reasoning_id = state.oai_resp_reasoning_id; oai_resp_message_id = state.oai_resp_message_id; + oai_seq_num_ptr = &state.oai_seq_num; } json to_json_non_oaicompat(); @@ -442,6 +446,7 @@ struct server_task_result_cmpl_partial : server_task_result { std::string oai_resp_reasoning_id; std::string oai_resp_message_id; std::string oai_resp_fc_id; + uint64_t * oai_seq_num_ptr = nullptr; // for Anthropic API: track if any reasoning content has been generated bool anthropic_has_reasoning = false;