Various fixes for tool calling
This commit is contained in:
@@ -12,7 +12,7 @@ import { searchSearxng } from "../search/searxng.js";
|
||||
import { buildOpenAIConversationMessage } from "./message-content.js";
|
||||
import type { ChatMessage } from "./types.js";
|
||||
|
||||
const MAX_TOOL_ROUNDS = 4;
|
||||
const MAX_TOOL_ROUNDS = env.CHAT_MAX_TOOL_ROUNDS;
|
||||
const DEFAULT_WEB_RESULTS = 5;
|
||||
const MAX_WEB_RESULTS = 10;
|
||||
const DEFAULT_FETCH_MAX_CHARACTERS = 12_000;
|
||||
@@ -25,6 +25,7 @@ const MAX_SHELL_COMMAND_CHARACTERS = 20_000;
|
||||
const DEFAULT_SHELL_MAX_OUTPUT_CHARACTERS = 24_000;
|
||||
const MAX_SHELL_MAX_OUTPUT_CHARACTERS = 80_000;
|
||||
const REMOTE_EXEC_MAX_BUFFER_BYTES = 1_000_000;
|
||||
const MAX_DANGLING_TOOL_INTENT_RETRIES = 1;
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
@@ -70,7 +71,7 @@ const CODEX_EXEC_TOOL = {
|
||||
function: {
|
||||
name: "codex_exec",
|
||||
description:
|
||||
"Delegate a coding, terminal, or multi-step software task to a persistent remote Codex CLI workspace. Use for complex code changes, repository inspection, running programs/tests, debugging build failures, or other tasks that need a real shell. Return the remote Codex summary and relevant stdout/stderr.",
|
||||
"Delegate a coding, terminal, or multi-step software task to a persistent remote Codex CLI workspace. Use for complex code changes, repository inspection, running programs/tests, debugging build failures, or other tasks that need a real shell. The task runs non-interactively; the remote Codex instance must make reasonable assumptions, complete the task, and return a final summary with relevant stdout/stderr.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
@@ -191,11 +192,12 @@ export const CHAT_TOOL_SYSTEM_PROMPT =
|
||||
"You can use tools to gather up-to-date web information when needed. " +
|
||||
"Use web_search for discovery and recent facts, and fetch_url to read the full content of a specific page. " +
|
||||
"Prefer tools when the user asks for current events, verification, sources, or details you do not already have. " +
|
||||
"When you decide tool use is needed, call the tool immediately in the same response; do not say you are running a tool unless you actually call it. " +
|
||||
(env.CHAT_CODEX_TOOL_ENABLED
|
||||
? "Use codex_exec when a request needs substantial coding work, repository inspection, shell commands, tests, debugging, or another complex task suited to a persistent Codex workspace. Provide codex_exec a complete prompt with the goal, constraints, and expected report-back format. "
|
||||
? "Use codex_exec when a request needs substantial coding work, repository inspection, shell commands, tests, debugging, or another complex task suited to a persistent Codex workspace. Provide codex_exec a complete prompt with the goal, constraints, assumptions, and expected report-back format. Never ask codex_exec to wait for user input or run interactive commands. "
|
||||
: "") +
|
||||
(env.CHAT_SHELL_TOOL_ENABLED
|
||||
? "Use shell_exec for direct command-line work on the remote devbox, including quick Python programs, calculations, file inspection, running tests, and small scripts. "
|
||||
? "Use shell_exec for direct non-interactive command-line work on the remote devbox, including quick Python programs, calculations, file inspection, running tests, and small scripts. "
|
||||
: "") +
|
||||
"Do not fabricate tool outputs; reason only from provided tool results.";
|
||||
|
||||
@@ -535,7 +537,20 @@ function buildDevboxSshTarget() {
|
||||
|
||||
function buildRemoteCodexCommand(prompt: string) {
|
||||
const workdir = env.CHAT_CODEX_REMOTE_WORKDIR.trim();
|
||||
const codexCommand = `codex exec ${shellQuote(prompt)}`;
|
||||
const wrappedPrompt = [
|
||||
"You are running in a non-interactive batch environment.",
|
||||
"",
|
||||
"Rules:",
|
||||
"- Do not ask questions or wait for user input.",
|
||||
"- Do not use interactive commands, editors, pagers, or prompts.",
|
||||
"- If details are ambiguous, make a reasonable assumption and continue.",
|
||||
"- Complete the task in one run, including any requested file edits, commands, and verification.",
|
||||
"- End with a concise final report that includes changed files, commands run, and outcomes.",
|
||||
"",
|
||||
"Task:",
|
||||
prompt,
|
||||
].join("\n");
|
||||
const codexCommand = `codex exec --skip-git-repo-check ${shellQuote(wrappedPrompt)} < /dev/null`;
|
||||
return `mkdir -p ${shellQuote(workdir)} && cd ${shellQuote(workdir)} && ${codexCommand}`;
|
||||
}
|
||||
|
||||
@@ -595,6 +610,7 @@ async function runCodexExecTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
|
||||
const run = async (keyPath?: string) => {
|
||||
const sshArgs = [
|
||||
"-n",
|
||||
"-o",
|
||||
"BatchMode=yes",
|
||||
"-o",
|
||||
@@ -662,6 +678,7 @@ async function runShellExecTool(input: unknown): Promise<ToolRunOutcome> {
|
||||
|
||||
const run = async (keyPath?: string) => {
|
||||
const sshArgs = [
|
||||
"-n",
|
||||
"-o",
|
||||
"BatchMode=yes",
|
||||
"-o",
|
||||
@@ -756,6 +773,31 @@ function buildEventArgs(name: string, args: Record<string, unknown>) {
|
||||
return args;
|
||||
}
|
||||
|
||||
function looksLikeDanglingToolIntent(text: string) {
|
||||
const normalized = text
|
||||
.toLowerCase()
|
||||
.replace(/[`*_>#-]/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
if (!normalized) return false;
|
||||
if (normalized.length > 800) return false;
|
||||
if (/\blet me know\b/.test(normalized) || /\bif you (want|would like)\b/.test(normalized)) return false;
|
||||
return (
|
||||
/\b(calling|running|executing|trying|checking|testing)\b.{0,80}\b(now|it|tool|command|shell_exec|codex_exec)\b/.test(normalized) ||
|
||||
/\b(let me|i'?ll|i will)\b.{0,120}\b(run|execute|call|try|check|test)\b/.test(normalized) ||
|
||||
/\b(stand by|hang on|one moment)\b/.test(normalized)
|
||||
);
|
||||
}
|
||||
|
||||
function appendDanglingToolIntentCorrection(conversation: any[], text: string) {
|
||||
conversation.push({ role: "assistant", content: text });
|
||||
conversation.push({
|
||||
role: "system",
|
||||
content:
|
||||
"Internal correction: the previous assistant message claimed it would run a tool, but no tool call was made. If the task needs an available tool, call it now. Otherwise provide the final answer directly without saying you will run a tool.",
|
||||
});
|
||||
}
|
||||
|
||||
function mergeUsage(acc: Required<ToolAwareUsage>, usage: any) {
|
||||
if (!usage) return false;
|
||||
acc.inputTokens += usage.prompt_tokens ?? 0;
|
||||
@@ -833,6 +875,7 @@ export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams):
|
||||
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
||||
let sawUsage = false;
|
||||
let totalToolCalls = 0;
|
||||
let danglingToolIntentRetries = 0;
|
||||
|
||||
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
|
||||
const completion = await params.client.chat.completions.create({
|
||||
@@ -858,8 +901,14 @@ export async function runToolAwareOpenAIChat(params: ToolAwareCompletionParams):
|
||||
|
||||
const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
|
||||
if (!toolCalls.length) {
|
||||
const text = typeof message.content === "string" ? message.content : "";
|
||||
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(text)) {
|
||||
danglingToolIntentRetries += 1;
|
||||
appendDanglingToolIntentCorrection(conversation, text);
|
||||
continue;
|
||||
}
|
||||
return {
|
||||
text: typeof message.content === "string" ? message.content : "",
|
||||
text,
|
||||
usage: sawUsage ? usageAcc : undefined,
|
||||
raw: { responses: rawResponses, toolCallsUsed: totalToolCalls },
|
||||
toolEvents,
|
||||
@@ -914,6 +963,7 @@ export async function* runToolAwareOpenAIChatStream(
|
||||
const usageAcc: Required<ToolAwareUsage> = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
||||
let sawUsage = false;
|
||||
let totalToolCalls = 0;
|
||||
let danglingToolIntentRetries = 0;
|
||||
|
||||
for (let round = 0; round < MAX_TOOL_ROUNDS; round += 1) {
|
||||
const stream = await params.client.chat.completions.create({
|
||||
@@ -938,9 +988,6 @@ export async function* runToolAwareOpenAIChatStream(
|
||||
const deltaText = choice?.delta?.content ?? "";
|
||||
if (typeof deltaText === "string" && deltaText.length) {
|
||||
roundText += deltaText;
|
||||
if (roundToolCalls.size === 0) {
|
||||
yield { type: "delta", text: deltaText };
|
||||
}
|
||||
}
|
||||
|
||||
const deltaToolCalls = Array.isArray(choice?.delta?.tool_calls) ? choice.delta.tool_calls : [];
|
||||
@@ -969,6 +1016,14 @@ export async function* runToolAwareOpenAIChatStream(
|
||||
}));
|
||||
|
||||
if (!normalizedToolCalls.length) {
|
||||
if (danglingToolIntentRetries < MAX_DANGLING_TOOL_INTENT_RETRIES && looksLikeDanglingToolIntent(roundText)) {
|
||||
danglingToolIntentRetries += 1;
|
||||
appendDanglingToolIntentCorrection(conversation, roundText);
|
||||
continue;
|
||||
}
|
||||
if (roundText) {
|
||||
yield { type: "delta", text: roundText };
|
||||
}
|
||||
yield {
|
||||
type: "done",
|
||||
result: {
|
||||
@@ -982,7 +1037,7 @@ export async function* runToolAwareOpenAIChatStream(
|
||||
}
|
||||
|
||||
totalToolCalls += normalizedToolCalls.length;
|
||||
conversation.push({
|
||||
const assistantToolCallMessage: any = {
|
||||
role: "assistant",
|
||||
tool_calls: normalizedToolCalls.map((call) => ({
|
||||
id: call.id,
|
||||
@@ -992,7 +1047,11 @@ export async function* runToolAwareOpenAIChatStream(
|
||||
arguments: call.arguments,
|
||||
},
|
||||
})),
|
||||
});
|
||||
};
|
||||
if (roundText) {
|
||||
assistantToolCallMessage.content = roundText;
|
||||
}
|
||||
conversation.push(assistantToolCallMessage);
|
||||
|
||||
for (const call of normalizedToolCalls) {
|
||||
const { event, toolResult } = await executeToolCallAndBuildEvent(call, params);
|
||||
|
||||
Reference in New Issue
Block a user