@@ -224,9 +224,7 @@ def _run_agentic_pipeline(self, messages):
224224 if llm_call_count > self .llm_call_budget :
225225 print (f"LLM call budget exceeded: { llm_call_count } > { self .llm_call_budget } . Stopping." )
226226 del self .async_llm_engine_map [request_id ]
227- while messages [- 1 ]["role" ] == "tool" :
228- messages .pop ()
229- return messages , logprobs
227+ return messages , response_input_ids , logprobs
230228 inputs = self ._build_prompt (messages , return_dict = True , return_tensors = "pt" )
231229 if num_prompt_tokens == 0 :
232230 num_prompt_tokens = inputs ["input_ids" ].size (- 1 )
@@ -235,9 +233,7 @@ def _run_agentic_pipeline(self, messages):
235233 f"Max tokens exceeded: Current have generated { inputs ['input_ids' ].size (- 1 ) - num_prompt_tokens } tokens > { self .generate_config .get ('max_tokens' , 512 )} . Stopping."
236234 )
237235 del self .async_llm_engine_map [request_id ]
238- while messages [- 1 ]["role" ] == "tool" :
239- messages .pop ()
240- return messages , logprobs
236+ return messages , response_input_ids , logprobs
241237 async_producer = self ._select_async_producer (request_id = request_id )
242238 agentic_generate_config = copy .deepcopy (self .generate_config )
243239 agentic_generate_config ["max_tokens" ] = self .agentic_config .get ("max_tokens" , 2048 )
@@ -262,7 +258,7 @@ def _run_agentic_pipeline(self, messages):
262258 if tool_call_count > self .tool_call_budget :
263259 print (f"Tool call budget exceeded: { tool_call_count } > { self .tool_call_budget } . Stopping." )
264260 del self .async_llm_engine_map [request_id ]
265- return messages , logprobs
261+ return messages , response_input_ids , logprobs
266262 tool_call_count += len (assistant_message ["tool_calls" ])
267263 handlers = []
268264 for tool_call in assistant_message ["tool_calls" ]:
@@ -277,4 +273,4 @@ def _run_agentic_pipeline(self, messages):
277273 else :
278274 # no further tool call, return the messages
279275 del self .async_llm_engine_map [request_id ]
280- return messages , logprobs
276+ return messages , response_input_ids , logprobs
0 commit comments