diff --git a/.gitignore b/.gitignore index 88b38d2..5b0195d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ dist/ .env.local .DS_Store *.log +.codex/code-mode-candidates/ diff --git a/README.md b/README.md index 3b44bc0..c0d1570 100644 --- a/README.md +++ b/README.md @@ -76,8 +76,15 @@ bun run flow list bun run flow:backend serve --cwd "$(pwd)" ``` -Code Mode flow steps are present on `main` but require -`CODEX_FLOWS_ENABLE_CODE_MODE=1` before execution. +Code Mode flow steps are present on `main` behind one mode flag: + +```bash +CODEX_FLOWS_MODE=code-mode +``` + +That mode enables `runner = "code-mode"` steps and makes stdio app-server +launches default to `bunx @peezy.tech/codex`. `CODEX_APP_SERVER_CODEX_COMMAND` +still wins when a specific local binary should be used. ## Development Flow diff --git a/apps/cli/src/actions.ts b/apps/cli/src/actions.ts index 31a6d8d..b5dcab7 100644 --- a/apps/cli/src/actions.ts +++ b/apps/cli/src/actions.ts @@ -18,6 +18,7 @@ export const APP_SERVER_ACTIONS = [ "thread/unarchive", "thread/compact/start", "thread/shellCommand", + "thread/codeMode/execute", "thread/approveGuardianDeniedAction", "thread/backgroundTerminals/clean", "thread/rollback", diff --git a/apps/cli/src/args.ts b/apps/cli/src/args.ts index 6c76d50..83a7d0e 100644 --- a/apps/cli/src/args.ts +++ b/apps/cli/src/args.ts @@ -3,6 +3,17 @@ import { isAppServerAction, type AppServerAction } from "./actions.ts"; export type ParsedArgs = | { type: "help" } | { type: "actions" } + | { type: "extract-code-mode"; outputDir: string | undefined } + | { type: "extract-code-mode-tool-input"; outputDir: string | undefined } + | { + type: "run-code-mode"; + file: string; + cwd: string | undefined; + codexCommand: string | undefined; + url: string; + timeoutMs: number; + pretty: boolean; + } | { type: "call"; action: AppServerAction; @@ -20,6 +31,9 @@ export function parseArgs(argv: string[], env: NodeJS.ProcessEnv): ParsedArgs { let url = env.CODEX_WORKSPACE_APP_SERVER_WS_URL ?? DEFAULT_WS_URL; let timeoutMs = defaultTimeoutMs; let pretty = true; + let cwd: string | undefined; + let outputDir: string | undefined; + let codexCommand = env.CODEX_APP_SERVER_CODEX_COMMAND; for (let index = 0; index < argv.length; index += 1) { const arg = argv[index]; @@ -67,6 +81,45 @@ export function parseArgs(argv: string[], env: NodeJS.ProcessEnv): ParsedArgs { pretty = true; continue; } + if (arg === "--cwd") { + const value = argv[index + 1]; + if (!value) { + throw new Error("--cwd requires a path"); + } + cwd = value; + index += 1; + continue; + } + if (arg.startsWith("--cwd=")) { + cwd = arg.slice("--cwd=".length); + continue; + } + if (arg === "--output-dir") { + const value = argv[index + 1]; + if (!value) { + throw new Error("--output-dir requires a path"); + } + outputDir = value; + index += 1; + continue; + } + if (arg.startsWith("--output-dir=")) { + outputDir = arg.slice("--output-dir=".length); + continue; + } + if (arg === "--codex-command") { + const value = argv[index + 1]; + if (!value) { + throw new Error("--codex-command requires a command path"); + } + codexCommand = value; + index += 1; + continue; + } + if (arg.startsWith("--codex-command=")) { + codexCommand = arg.slice("--codex-command=".length); + continue; + } if (arg === "--") { positionals.push(...argv.slice(index + 1)); break; @@ -87,6 +140,33 @@ export function parseArgs(argv: string[], env: NodeJS.ProcessEnv): ParsedArgs { if (command === "actions") { return { type: "actions" }; } + if (command === "extract-code-mode") { + return { + type: "extract-code-mode", + outputDir, + }; + } + if (command === "extract-code-mode-tool-input") { + return { + type: "extract-code-mode-tool-input", + outputDir, + }; + } + if (command === "run-code-mode") { + const file = firstPositional(positionals.slice(1)); + if (!file) { + throw new Error("run-code-mode requires a candidate file"); + } + return { + type: "run-code-mode", + file, + cwd, + codexCommand, + url, + timeoutMs, + pretty, + }; + } const action = command === "call" ? positionals[1] : command; const paramsParts = command === "call" ? positionals.slice(2) : positionals.slice(1); @@ -106,6 +186,10 @@ export function parseArgs(argv: string[], env: NodeJS.ProcessEnv): ParsedArgs { }; } +function firstPositional(values: string[]) { + return values.find((value) => !value.startsWith("--")); +} + function parseTimeout(value: string) { const parsed = Number(value); if (!Number.isInteger(parsed) || parsed <= 0) { diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts index 64eb4ac..150f57d 100644 --- a/apps/cli/src/index.ts +++ b/apps/cli/src/index.ts @@ -3,6 +3,11 @@ import { CodexAppServerClient } from "@peezy.tech/codex-flows"; import { APP_SERVER_ACTIONS } from "./actions.ts"; import { DEFAULT_WS_URL, parseArgs } from "./args.ts"; +import { + extractCodeModeCandidates, + extractCodeModeToolInputCandidates, + runCodeModeCandidate, +} from "./recipes.ts"; async function main() { try { @@ -14,6 +19,15 @@ async function main() { case "actions": write(`${APP_SERVER_ACTIONS.join("\n")}\n`); return; + case "extract-code-mode": + await extractCodeMode(parsed.outputDir); + return; + case "extract-code-mode-tool-input": + await extractCodeModeToolInput(parsed.outputDir); + return; + case "run-code-mode": + write(formatJson(await runCodeModeCandidate(parsed), parsed.pretty)); + return; case "call": await callAction(parsed); return; @@ -24,6 +38,28 @@ async function main() { } } +async function extractCodeMode(outputDir: string | undefined) { + const result = await extractCodeModeCandidates({ + stdin: process.stdin, + outputDir, + }); + for (const saved of result.saved) { + writeError(`saved ${saved.codePath}\n`); + } + write(formatJson({ continue: true }, true)); +} + +async function extractCodeModeToolInput(outputDir: string | undefined) { + const result = await extractCodeModeToolInputCandidates({ + stdin: process.stdin, + outputDir, + }); + for (const saved of result.saved) { + writeError(`saved ${saved.codePath}\n`); + } + write(formatJson({ continue: true }, true)); +} + type CallArgs = Extract, { type: "call" }>; async function callAction(args: CallArgs) { @@ -93,11 +129,20 @@ Usage: codex-app [options] call [params-json] echo '' | codex-app [options] codex-app actions + codex-app extract-code-mode [--output-dir ] + codex-app extract-code-mode-tool-input [--output-dir ] + codex-app run-code-mode [--cwd ] Options: - --url, --ws-url App-server WebSocket URL + --url, --ws-url App-server URL; use stdio:// to spawn a Codex app-server Defaults to CODEX_WORKSPACE_APP_SERVER_WS_URL or ${DEFAULT_WS_URL} --timeout-ms Request timeout in milliseconds + --cwd Working directory for run-code-mode + --output-dir Candidate output dir for extract-code-mode + --codex-command Codex binary for run-code-mode with --url stdio:// + Defaults to CODEX_APP_SERVER_CODEX_COMMAND. + With CODEX_FLOWS_MODE=code-mode, falls back to + bunx @peezy.tech/codex. --compact Print compact JSON --pretty Print pretty JSON -h, --help Show this help @@ -105,6 +150,9 @@ Options: Examples: codex-app thread/list '{"limit": 20, "sourceKinds": []}' echo '{"refreshToken": false}' | codex-app account/read + codex-app extract-code-mode < stop-hook-payload.json + codex-app extract-code-mode-tool-input < pre-tool-use-hook-payload.json + CODEX_FLOWS_MODE=code-mode codex-app --url=stdio:// run-code-mode .codex/code-mode-candidates/turn-a1b2c3.mjs `; } diff --git a/apps/cli/src/recipes.ts b/apps/cli/src/recipes.ts new file mode 100644 index 0000000..7f03ecc --- /dev/null +++ b/apps/cli/src/recipes.ts @@ -0,0 +1,452 @@ +import { createHash } from "node:crypto"; +import { mkdir, readFile, writeFile } from "node:fs/promises"; +import path from "node:path"; + +import { CodexAppServerClient } from "@peezy.tech/codex-flows"; + +type StopHookInput = { + session_id?: unknown; + transcript_path?: unknown; + cwd?: unknown; + hook_event_name?: unknown; + turn_id?: unknown; + last_assistant_message?: unknown; +}; + +type ToolInputHookInput = StopHookInput & { + tool_name?: unknown; + tool_input?: unknown; + tool_use_id?: unknown; +}; + +export type ExtractCodeModeOptions = { + stdin: NodeJS.ReadableStream; + outputDir?: string; + now?: Date; +}; + +export type RunCodeModeOptions = { + file: string; + cwd?: string; + codexCommand?: string; + url: string; + timeoutMs: number; +}; + +export type ExtractCodeModeResult = { + continue: true; + saved: Array<{ codePath: string; metadataPath: string }>; +}; + +type CommandExecResponse = Awaited>; + +type ReplayExecCommandInput = { + cmd?: unknown; + workdir?: unknown; + cwd?: unknown; + shell?: unknown; + timeout_ms?: unknown; + max_output_tokens?: unknown; +}; + +export async function extractCodeModeCandidates( + options: ExtractCodeModeOptions, +): Promise { + const rawInput = await readStream(options.stdin); + const input = parseHookInput(rawInput); + const workspaceCwd = stringValue(input.cwd) ?? process.cwd(); + const outputDir = path.resolve( + workspaceCwd, + options.outputDir ?? ".codex/code-mode-candidates", + ); + const rawSources = await readCandidateSources(input); + const candidates = uniqueCodeBlocks(rawSources.flatMap(extractJavaScriptBlocks)); + const saved: ExtractCodeModeResult["saved"] = []; + + await mkdir(outputDir, { recursive: true }); + for (const code of candidates) { + const hash = createHash("sha256").update(code).digest("hex").slice(0, 12); + const stem = `${slug(stringValue(input.turn_id) ?? timestamp(options.now))}-${hash}`; + const codePath = path.join(outputDir, `${stem}.mjs`); + const metadataPath = path.join(outputDir, `${stem}.json`); + await writeFile(codePath, `${code.trim()}\n`); + await writeFile( + metadataPath, + `${JSON.stringify( + { + version: 1, + sessionId: stringValue(input.session_id), + turnId: stringValue(input.turn_id), + transcriptPath: stringValue(input.transcript_path), + cwd: workspaceCwd, + codePath, + createdAt: (options.now ?? new Date()).toISOString(), + source: "codex-stop-hook", + status: "candidate", + }, + null, + 2, + )}\n`, + ); + saved.push({ codePath, metadataPath }); + } + + return { continue: true, saved }; +} + +export async function extractCodeModeToolInputCandidates( + options: ExtractCodeModeOptions, +): Promise { + const rawInput = await readStream(options.stdin); + const input = parseHookInput(rawInput); + const workspaceCwd = stringValue(input.cwd) ?? process.cwd(); + const outputDir = path.resolve( + workspaceCwd, + options.outputDir ?? ".codex/code-mode-candidates", + ); + const code = toolInputSource(input); + const saved: ExtractCodeModeResult["saved"] = []; + + if (!code) { + return { continue: true, saved }; + } + + await mkdir(outputDir, { recursive: true }); + const hash = createHash("sha256").update(code).digest("hex").slice(0, 12); + const stem = `${slug( + stringValue(input.turn_id) ?? stringValue(input.tool_use_id) ?? timestamp(options.now), + )}-${hash}`; + const codePath = path.join(outputDir, `${stem}.mjs`); + const metadataPath = path.join(outputDir, `${stem}.json`); + await writeFile(codePath, `${code.trim()}\n`); + await writeFile( + metadataPath, + `${JSON.stringify( + { + version: 1, + sessionId: stringValue(input.session_id), + turnId: stringValue(input.turn_id), + toolUseId: stringValue(input.tool_use_id), + transcriptPath: stringValue(input.transcript_path), + cwd: workspaceCwd, + codePath, + createdAt: (options.now ?? new Date()).toISOString(), + source: "codex-pre-tool-use-exec", + status: "candidate", + }, + null, + 2, + )}\n`, + ); + saved.push({ codePath, metadataPath }); + + return { continue: true, saved }; +} + +export async function runCodeModeCandidate(options: RunCodeModeOptions) { + const file = path.resolve(options.file); + const source = await readFile(file, "utf8"); + const metadata = await readCandidateMetadata(file); + const cwd = path.resolve(options.cwd ?? metadata.cwd ?? process.cwd()); + const output: string[] = []; + const storedValues = new Map(); + const client = new CodexAppServerClient({ + ...(options.url === "stdio://" + ? { + transportOptions: { + codexCommand: options.codexCommand, + requestTimeoutMs: options.timeoutMs, + }, + } + : { + webSocketTransportOptions: { + url: options.url, + requestTimeoutMs: options.timeoutMs, + }, + }), + clientName: "codex-app-cli", + clientTitle: "Codex App CLI", + clientVersion: "0.1.0", + }); + + client.on("request", (message) => { + client.respondError(message.id, -32603, "codex-app CLI does not handle server requests"); + }); + + try { + await client.connect(); + await evaluateCodeModeSource(source, { + client, + cwd, + output, + storedValues, + timeoutMs: options.timeoutMs, + }); + return { + exitCode: 0, + stdout: output.join(""), + stderr: "", + } satisfies CommandExecResponse; + } catch (error) { + return { + exitCode: 1, + stdout: output.join(""), + stderr: errorText(error), + } satisfies CommandExecResponse; + } finally { + client.close(); + } +} + +async function evaluateCodeModeSource( + source: string, + options: { + client: CodexAppServerClient; + cwd: string; + output: string[]; + storedValues: Map; + timeoutMs: number; + }, +) { + const tools = createReplayTools(options); + const text = (value: unknown) => { + options.output.push(outputText(value)); + }; + const image = () => { + // Image replay is intentionally a no-op until the CLI has a display target. + }; + const store = (key: string, value: unknown) => { + options.storedValues.set(key, value); + }; + const load = (key: string) => options.storedValues.get(key); + const notify = (value: unknown) => { + options.output.push(outputText(value)); + }; + const yieldControl = async () => undefined; + const exit = () => { + throw new CodeModeExit(); + }; + const AsyncFunction = async function () { + return undefined; + }.constructor as new (...args: string[]) => (...values: unknown[]) => Promise; + const run = new AsyncFunction( + "tools", + "text", + "image", + "store", + "load", + "notify", + "setTimeout", + "clearTimeout", + "ALL_TOOLS", + "yield_control", + "exit", + source, + ); + + try { + await run( + tools, + text, + image, + store, + load, + notify, + setTimeout, + clearTimeout, + ALL_REPLAY_TOOLS, + yieldControl, + exit, + ); + } catch (error) { + if (error instanceof CodeModeExit) { + return; + } + throw error; + } +} + +function createReplayTools(options: { + client: CodexAppServerClient; + cwd: string; + timeoutMs: number; +}) { + return { + exec_command: async (input: ReplayExecCommandInput) => { + const command = stringValue(input.cmd); + if (!command) { + throw new Error("exec_command requires a string cmd"); + } + const startedAt = Date.now(); + const shell = stringValue(input.shell) ?? "/bin/bash"; + const response = await options.client.commandExec({ + command: [shell, "-lc", command], + cwd: stringValue(input.workdir) ?? stringValue(input.cwd) ?? options.cwd, + timeoutMs: numberValue(input.timeout_ms) ?? options.timeoutMs, + disableOutputCap: true, + sandboxPolicy: { type: "dangerFullAccess" }, + }); + const output = response.stdout + response.stderr; + const maxOutputTokens = numberValue(input.max_output_tokens); + const truncated = truncateOutput(output, maxOutputTokens); + return { + exit_code: response.exitCode, + output: truncated.output, + ...(truncated.originalTokenCount === undefined + ? {} + : { original_token_count: truncated.originalTokenCount }), + wall_time_seconds: (Date.now() - startedAt) / 1000, + }; + }, + }; +} + +const ALL_REPLAY_TOOLS = [ + { + name: "exec_command", + description: "Runs a shell command through the selected Codex app-server.", + }, +]; + +class CodeModeExit extends Error {} + +async function readCandidateSources(input: StopHookInput) { + const sources: string[] = []; + const lastAssistantMessage = stringValue(input.last_assistant_message); + if (lastAssistantMessage) { + sources.push(lastAssistantMessage); + } + + const transcriptPath = stringValue(input.transcript_path); + if (transcriptPath) { + try { + sources.push(await readFile(transcriptPath, "utf8")); + } catch { + // Missing transcripts should not block the Stop hook. + } + } + return sources; +} + +function extractJavaScriptBlocks(raw: string) { + const blocks: string[] = []; + const fenced = /```(?:js|javascript|mjs|ts|typescript)\s*\n([\s\S]*?)```/gi; + for (let match = fenced.exec(raw); match; match = fenced.exec(raw)) { + const code = match[1]?.trim(); + if (code) { + blocks.push(code); + } + } + return blocks; +} + +function uniqueCodeBlocks(blocks: string[]) { + const seen = new Set(); + return blocks.filter((block) => { + const normalized = block.trim(); + if (!normalized || seen.has(normalized)) { + return false; + } + seen.add(normalized); + return true; + }); +} + +function toolInputSource(input: ToolInputHookInput) { + if (stringValue(input.tool_name) !== "exec") { + return undefined; + } + const toolInput = recordValue(input.tool_input); + return toolInput ? stringValue(toolInput.source) : undefined; +} + +async function readCandidateMetadata(file: string): Promise<{ cwd?: string }> { + const metadataPath = file.replace(/\.[^.]+$/, ".json"); + try { + const parsed = JSON.parse(await readFile(metadataPath, "utf8")) as unknown; + if (isRecord(parsed) && typeof parsed.cwd === "string") { + return { cwd: parsed.cwd }; + } + } catch { + // Metadata is optional; explicit --cwd or process cwd can still run the file. + } + return {}; +} + +function parseHookInput(rawInput: string): ToolInputHookInput { + if (!rawInput.trim()) { + return {}; + } + try { + const parsed = JSON.parse(rawInput) as unknown; + return isRecord(parsed) ? parsed : {}; + } catch { + return {}; + } +} + +async function readStream(stream: NodeJS.ReadableStream) { + let text = ""; + stream.setEncoding("utf8"); + for await (const chunk of stream) { + text += typeof chunk === "string" ? chunk : chunk.toString("utf8"); + } + return text; +} + +function stringValue(value: unknown) { + return typeof value === "string" && value.trim() ? value : undefined; +} + +function numberValue(value: unknown) { + return typeof value === "number" && Number.isFinite(value) ? value : undefined; +} + +function outputText(value: unknown) { + if (typeof value === "string") { + return value; + } + try { + const json = JSON.stringify(value); + return json === undefined ? String(value) : json; + } catch { + return String(value); + } +} + +function truncateOutput(output: string, maxOutputTokens: number | undefined) { + if (!maxOutputTokens || maxOutputTokens <= 0) { + return { output }; + } + const maxChars = maxOutputTokens * 4; + if (output.length <= maxChars) { + return { output }; + } + return { + output: output.slice(0, maxChars), + originalTokenCount: Math.ceil(output.length / 4), + }; +} + +function errorText(error: unknown) { + if (error instanceof Error) { + return error.stack ?? error.message; + } + return String(error); +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function recordValue(value: unknown) { + return isRecord(value) ? value : undefined; +} + +function timestamp(now: Date | undefined) { + return (now ?? new Date()).toISOString(); +} + +function slug(value: string) { + return value.replace(/[^A-Za-z0-9_.-]+/g, "-").replace(/^-+|-+$/g, "") || "turn"; +} diff --git a/apps/cli/test/args.test.ts b/apps/cli/test/args.test.ts index e9a3fd7..ee039aa 100644 --- a/apps/cli/test/args.test.ts +++ b/apps/cli/test/args.test.ts @@ -46,6 +46,79 @@ test("uses environment URL default", () => { }); }); +test("parses code mode extraction command", () => { + expect(parseArgs(["extract-code-mode", "--output-dir", ".tmp/candidates"], {})).toEqual({ + type: "extract-code-mode", + outputDir: ".tmp/candidates", + }); +}); + +test("parses code mode tool input extraction command", () => { + expect(parseArgs(["extract-code-mode-tool-input", "--output-dir", ".tmp/candidates"], {})).toEqual({ + type: "extract-code-mode-tool-input", + outputDir: ".tmp/candidates", + }); +}); + +test("parses code mode runner command", () => { + expect( + parseArgs( + [ + "--url=ws://localhost:4000", + "--timeout-ms", + "5000", + "run-code-mode", + ".codex/code-mode-candidates/demo.mjs", + "--cwd", + "/tmp/demo", + ], + {}, + ), + ).toEqual({ + type: "run-code-mode", + file: ".codex/code-mode-candidates/demo.mjs", + cwd: "/tmp/demo", + codexCommand: undefined, + url: "ws://localhost:4000", + timeoutMs: 5000, + pretty: true, + }); +}); + +test("parses code mode runner stdio transport command", () => { + expect( + parseArgs( + [ + "--url=stdio://", + "--codex-command", + "/tmp/codex", + "run-code-mode", + ".codex/code-mode-candidates/demo.mjs", + ], + {}, + ), + ).toEqual({ + type: "run-code-mode", + file: ".codex/code-mode-candidates/demo.mjs", + cwd: undefined, + codexCommand: "/tmp/codex", + url: "stdio://", + timeoutMs: 90_000, + pretty: true, + }); +}); + +test("uses environment Codex command for stdio runner", () => { + expect( + parseArgs(["--url=stdio://", "run-code-mode", "demo.mjs"], { + CODEX_APP_SERVER_CODEX_COMMAND: "/tmp/forked-codex", + }), + ).toMatchObject({ + type: "run-code-mode", + codexCommand: "/tmp/forked-codex", + }); +}); + test("rejects unknown actions before connecting", () => { expect(() => parseArgs(["not-a-method"], {})).toThrow("Unknown action"); }); diff --git a/apps/cli/test/recipes.test.ts b/apps/cli/test/recipes.test.ts new file mode 100644 index 0000000..3cdd62b --- /dev/null +++ b/apps/cli/test/recipes.test.ts @@ -0,0 +1,71 @@ +import { expect, test } from "bun:test"; +import { mkdtemp, readFile, rm } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { Readable } from "node:stream"; + +import { extractCodeModeToolInputCandidates } from "../src/recipes.ts"; + +test("extracts Code Mode exec source from PreToolUse tool input", async () => { + const directory = await mkdtemp(path.join(os.tmpdir(), "codex-cli-extract-")); + const transcriptPath = path.join(directory, "transcript.jsonl"); + const source = "const answer = 42;\ntext(answer);"; + + try { + const result = await extractCodeModeToolInputCandidates({ + stdin: Readable.from([ + JSON.stringify({ + hook_event_name: "PreToolUse", + tool_name: "exec", + session_id: "session-1", + turn_id: "turn-1", + tool_use_id: "tool-1", + cwd: directory, + transcript_path: transcriptPath, + tool_input: { source }, + }), + ]), + outputDir: ".candidates", + now: new Date("2026-01-02T03:04:05.000Z"), + }); + + expect(result.saved).toHaveLength(1); + const saved = result.saved[0]; + expect(saved).toBeDefined(); + if (!saved) { + throw new Error("expected a saved candidate"); + } + + expect(path.dirname(saved.codePath)).toBe(path.join(directory, ".candidates")); + expect(await readFile(saved.codePath, "utf8")).toBe(`${source}\n`); + + const metadata = JSON.parse(await readFile(saved.metadataPath, "utf8")) as Record; + expect(metadata.version).toBe(1); + expect(metadata.sessionId).toBe("session-1"); + expect(metadata.turnId).toBe("turn-1"); + expect(metadata.toolUseId).toBe("tool-1"); + expect(metadata.transcriptPath).toBe(transcriptPath); + expect(metadata.cwd).toBe(directory); + expect(metadata.codePath).toBe(saved.codePath); + expect(metadata.createdAt).toBe("2026-01-02T03:04:05.000Z"); + expect(metadata.source).toBe("codex-pre-tool-use-exec"); + expect(metadata.status).toBe("candidate"); + } finally { + await rm(directory, { recursive: true, force: true }); + } +}); + +test("ignores non-exec PreToolUse payloads", async () => { + const result = await extractCodeModeToolInputCandidates({ + stdin: Readable.from([ + JSON.stringify({ + hook_event_name: "PreToolUse", + tool_name: "wait", + tool_input: { source: "text('not exec');" }, + }), + ]), + outputDir: ".candidates", + }); + + expect(result).toEqual({ continue: true, saved: [] }); +}); diff --git a/apps/flow-backend-systemd-local/src/config.ts b/apps/flow-backend-systemd-local/src/config.ts index 9b854de..d494af1 100644 --- a/apps/flow-backend-systemd-local/src/config.ts +++ b/apps/flow-backend-systemd-local/src/config.ts @@ -142,6 +142,7 @@ export function helpText(): string { "Environment:", " CODEX_FLOW_BACKEND_SECRET Optional HMAC secret for HTTP dispatches", " CODEX_FLOW_BACKEND_EXECUTOR direct or systemd-run", + " CODEX_FLOWS_MODE Set to code-mode to enable Code Mode and fork defaults", " CODEX_FLOWS_ENABLE_CODE_MODE Enables runner = \"code-mode\" steps", " CODEX_FLOW_PUSH/PUBLISH Optional release-flow action gates", "", @@ -168,6 +169,7 @@ function executorEnv(value: string | undefined): FlowBackendExecutor { function forwardEnv(value: string | undefined): string[] { const defaults = [ + "CODEX_FLOWS_MODE", "CODEX_FLOWS_ENABLE_CODE_MODE", "CODEX_FLOW_COMMIT", "CODEX_FLOW_PUSH", @@ -175,6 +177,8 @@ function forwardEnv(value: string | undefined): string[] { "CODEX_FLOW_FORCE", "CODEX_FLOW_SQUASH_PATCH_STACK", "CODEX_APP_SERVER_CODEX_COMMAND", + "CODEX_APP_SERVER_CODEX_PACKAGE", + "CODEX_APP_SERVER_BUNX_COMMAND", "CODEX_HOME", "PEEZY_CODEX_REPO", "PEEZY_CODEX_TARGET_BRANCH", diff --git a/apps/flow-backend-systemd-local/test/backend.test.ts b/apps/flow-backend-systemd-local/test/backend.test.ts index e9a87e1..0284547 100644 --- a/apps/flow-backend-systemd-local/test/backend.test.ts +++ b/apps/flow-backend-systemd-local/test/backend.test.ts @@ -70,6 +70,7 @@ test("builds systemd-run commands without executing them", () => { flowName: "demo", stepName: "hello", env: { + CODEX_FLOWS_MODE: "code-mode", CODEX_FLOWS_ENABLE_CODE_MODE: "1", CODEX_FLOW_PUSH: "1", PEEZY_CODEX_REPO: "/tmp/codex", @@ -79,6 +80,7 @@ test("builds systemd-run commands without executing them", () => { expect(command.command).toBe("systemd-run"); expect(command.args).toContain("--user"); expect(command.args).toContain("--wait"); + expect(command.args).toContain("--setenv=CODEX_FLOWS_MODE=code-mode"); expect(command.args).toContain("--setenv=CODEX_FLOWS_ENABLE_CODE_MODE=1"); expect(command.args).toContain("--setenv=CODEX_FLOW_PUSH=1"); expect(command.args).toContain("--setenv=PEEZY_CODEX_REPO=/tmp/codex"); diff --git a/bun.lock b/bun.lock index ca6b232..e962170 100644 --- a/bun.lock +++ b/bun.lock @@ -88,7 +88,7 @@ }, "packages/codex-client": { "name": "@peezy.tech/codex-flows", - "version": "0.1.1", + "version": "0.2.0", "devDependencies": { "@types/bun": "^1.3.13", "@types/node": "^22.10.10", diff --git a/docs/flows.md b/docs/flows.md index d953c17..41e8bf4 100644 --- a/docs/flows.md +++ b/docs/flows.md @@ -71,14 +71,19 @@ JSON. `runner = "code-mode"` starts a Codex app-server and calls the fork-only `thread/codeMode/execute` method through a raw JSON-RPC request. Code Mode code -is present on `main`, but execution is disabled unless: +is present on `main`, but execution is disabled unless codex-flows is set to +Code Mode: ```bash -CODEX_FLOWS_ENABLE_CODE_MODE=1 +CODEX_FLOWS_MODE=code-mode ``` -Set `CODEX_APP_SERVER_CODEX_COMMAND` when Code Mode should run against the -Peezy fork instead of the default `codex` binary. +That single mode setting also makes stdio app-server launches default to +`bunx @peezy.tech/codex`. Set `CODEX_APP_SERVER_CODEX_COMMAND` when Code Mode +should run against a specific local binary instead, or +`CODEX_APP_SERVER_CODEX_PACKAGE` when it should use a different npm package. +The older `CODEX_FLOWS_ENABLE_CODE_MODE=1` gate is still accepted as a narrow +runner-only escape hatch. ## Commands @@ -156,7 +161,7 @@ The upstream `openai/codex` release event fans out to two flow packages: commits when changed, and can push/trigger trusted publishing when configured. - `peezy-codex-fork`: Code Mode runner. Rebases the Peezy fork patch stack onto the upstream release tag, optionally squashes the patch stack, verifies the - fork, and can push/tag to trigger the fork release workflow when configured. + fork, and can push/tag to trigger the fork release flow when configured. Publishing is controlled by flow config and environment. The packaged defaults commit local changes when appropriate but do not push or publish until diff --git a/package.json b/package.json index 9468dcb..2455dd0 100644 --- a/package.json +++ b/package.json @@ -38,10 +38,12 @@ "scripts": { "build": "bun run --workspaces build", "check:types": "bun run --workspaces check:types", + "codex:update": "bun scripts/run-codex-release-update-thread.ts", "dev": "bun run --filter web dev", "dev:web": "bun run --filter web dev", "flow": "bun apps/flow-runner/src/index.ts", "flow:backend": "bun apps/flow-backend-systemd-local/src/index.ts", + "replay:thread": "bun scripts/run-code-mode-in-new-thread.ts", "start": "bun run --filter web preview", "start:discord:debug:commentary": "bun run --filter codex-discord-bridge start:debug:commentary", "test": "bun run --filter @peezy.tech/codex-flows test && bun run --filter @peezy.tech/flow-runtime test && bun run --filter codex-flow-systemd-local test && bun run --filter codex-app-cli test && bun run --filter codex-discord-bridge test" diff --git a/packages/codex-client/README.md b/packages/codex-client/README.md index 7f5e440..00302c6 100644 --- a/packages/codex-client/README.md +++ b/packages/codex-client/README.md @@ -32,7 +32,12 @@ This package owns the low-level JSON-RPC client, transports, framework-agnostic ## Transports -`CodexAppServerClient` defaults to a stdio transport that starts `codex app-server` when no explicit transport is provided. +`CodexAppServerClient` defaults to a stdio transport that starts +`codex app-server` when no explicit transport is provided. When +`CODEX_FLOWS_MODE=code-mode`, the same stdio default becomes +`bunx @peezy.tech/codex app-server`, so Code Mode callers automatically use the +Peezy fork. Set `CODEX_APP_SERVER_CODEX_COMMAND` or pass +`transportOptions.codexCommand` to use a locally built binary instead. It can also connect to an existing WebSocket app-server when `CODEX_WORKSPACE_APP_SERVER_WS_URL` is set, or when `webSocketTransportOptions.url` is passed. diff --git a/packages/codex-client/package.json b/packages/codex-client/package.json index 9e5043a..0ca1c99 100644 --- a/packages/codex-client/package.json +++ b/packages/codex-client/package.json @@ -1,6 +1,6 @@ { "name": "@peezy.tech/codex-flows", - "version": "0.1.1", + "version": "0.2.0", "description": "Codex app-server JSON-RPC client, flow helpers, and generated protocol types.", "type": "module", "license": "Apache-2.0", diff --git a/packages/codex-client/src/app-server/stdio-transport.ts b/packages/codex-client/src/app-server/stdio-transport.ts index 8cb5fa0..aa9a2e3 100644 --- a/packages/codex-client/src/app-server/stdio-transport.ts +++ b/packages/codex-client/src/app-server/stdio-transport.ts @@ -10,6 +10,11 @@ import { requireJsonRpcResult, stringifyJsonRpc, } from "./rpc.ts"; +import { + CODEX_FLOWS_CODE_MODE, + DEFAULT_CODE_MODE_CODEX_PACKAGE, + codexFlowsMode, +} from "../mode.ts"; type PendingRequest = { resolve: (value: JsonRpcResponse) => void; @@ -28,6 +33,14 @@ export type CodexStdioTransportOptions = { requestTimeoutMs?: number; }; +export type ResolvedCodexStdioCommand = { + command: string; + args: string[]; +}; + +export const DEFAULT_CODEX_COMMAND = "codex"; +export const DEFAULT_CODEX_NPM_PACKAGE = DEFAULT_CODE_MODE_CODEX_PACKAGE; + export class CodexStdioTransport extends CodexEventEmitter { readonly requestTimeoutMs: number; #codexCommand: string; @@ -40,10 +53,9 @@ export class CodexStdioTransport extends CodexEventEmitter { constructor(options: CodexStdioTransportOptions = {}) { super(); - this.#codexCommand = options.codexCommand ?? "codex"; - const appServerSocket = - options.appServerSocket ?? process.env.CODEX_WORKSPACE_APP_SERVER_SOCK; - this.#args = options.args ?? defaultCodexArgs(appServerSocket); + const command = resolveCodexStdioCommand(options, { ...process.env, ...options.env }); + this.#codexCommand = command.command; + this.#args = command.args; this.#cwd = options.cwd; this.#env = options.env; this.requestTimeoutMs = options.requestTimeoutMs ?? 60_000; @@ -200,6 +212,28 @@ export class CodexStdioTransport extends CodexEventEmitter { } } +export function resolveCodexStdioCommand( + options: Pick = {}, + env: Record = process.env, +): ResolvedCodexStdioCommand { + const explicitCommand = options.codexCommand ?? env.CODEX_APP_SERVER_CODEX_COMMAND; + const appServerSocket = options.appServerSocket ?? env.CODEX_WORKSPACE_APP_SERVER_SOCK; + const args = options.args ?? defaultCodexArgs(appServerSocket); + if (explicitCommand?.trim()) { + return { command: explicitCommand, args }; + } + + const packageName = env.CODEX_APP_SERVER_CODEX_PACKAGE?.trim(); + if (packageName || codexFlowsMode(env) === CODEX_FLOWS_CODE_MODE) { + return { + command: env.CODEX_APP_SERVER_BUNX_COMMAND?.trim() || "bunx", + args: [packageName || DEFAULT_CODE_MODE_CODEX_PACKAGE, ...args], + }; + } + + return { command: DEFAULT_CODEX_COMMAND, args }; +} + function killChildProcessGroup( child: AppServerProcess, signal: NodeJS.Signals, diff --git a/packages/codex-client/src/index.ts b/packages/codex-client/src/index.ts index c841adb..ee63a94 100644 --- a/packages/codex-client/src/index.ts +++ b/packages/codex-client/src/index.ts @@ -5,6 +5,10 @@ export { } from "./app-server/client.ts"; export { CodexStdioTransport, + DEFAULT_CODEX_COMMAND, + DEFAULT_CODEX_NPM_PACKAGE, + resolveCodexStdioCommand, + type ResolvedCodexStdioCommand, type CodexStdioTransportOptions, } from "./app-server/stdio-transport.ts"; export { @@ -49,3 +53,9 @@ export type { CodexUsageWindow, WaitForLoginOptions, } from "./app-server/auth.ts"; +export { + CODEX_FLOWS_CODE_MODE, + DEFAULT_CODE_MODE_CODEX_PACKAGE, + codexFlowsCodeModeEnabled, + codexFlowsMode, +} from "./mode.ts"; diff --git a/packages/codex-client/src/mode.ts b/packages/codex-client/src/mode.ts new file mode 100644 index 0000000..bf0d426 --- /dev/null +++ b/packages/codex-client/src/mode.ts @@ -0,0 +1,20 @@ +export const CODEX_FLOWS_CODE_MODE = "code-mode"; +export const DEFAULT_CODE_MODE_CODEX_PACKAGE = "@peezy.tech/codex"; + +export function codexFlowsMode( + env: Record = process.env, +): string | undefined { + const value = env.CODEX_FLOWS_MODE?.trim().toLowerCase(); + return value || undefined; +} + +export function codexFlowsCodeModeEnabled( + env: Record = process.env, +): boolean { + return booleanEnv(env.CODEX_FLOWS_ENABLE_CODE_MODE) || codexFlowsMode(env) === CODEX_FLOWS_CODE_MODE; +} + +function booleanEnv(value: string | undefined): boolean { + const normalized = value?.trim().toLowerCase(); + return normalized === "1" || normalized === "true" || normalized === "yes" || normalized === "on"; +} diff --git a/packages/codex-client/test/stdio-transport.test.ts b/packages/codex-client/test/stdio-transport.test.ts index 936e53d..0a15021 100644 --- a/packages/codex-client/test/stdio-transport.test.ts +++ b/packages/codex-client/test/stdio-transport.test.ts @@ -2,7 +2,11 @@ import { expect, test } from "bun:test"; import { mkdtemp, rm } from "node:fs/promises"; import os from "node:os"; import path from "node:path"; -import { CodexStdioTransport } from "../src/app-server/stdio-transport.ts"; +import { + DEFAULT_CODEX_NPM_PACKAGE, + CodexStdioTransport, + resolveCodexStdioCommand, +} from "../src/app-server/stdio-transport.ts"; test("round-trips JSON-RPC over Bun stdio transport", async () => { const directory = await mkdtemp(path.join(os.tmpdir(), "codex-stdio-")); @@ -28,6 +32,54 @@ test("round-trips JSON-RPC over Bun stdio transport", async () => { } }); +test("resolves default stdio command from codex-flows mode", () => { + expect(resolveCodexStdioCommand({}, {})).toEqual({ + command: "codex", + args: ["app-server", "--listen", "stdio://", "--enable", "apps", "--enable", "hooks"], + }); + expect(resolveCodexStdioCommand({}, { CODEX_FLOWS_MODE: "code-mode" })).toEqual({ + command: "bunx", + args: [ + DEFAULT_CODEX_NPM_PACKAGE, + "app-server", + "--listen", + "stdio://", + "--enable", + "apps", + "--enable", + "hooks", + ], + }); + expect(resolveCodexStdioCommand({}, { CODEX_FLOWS_ENABLE_CODE_MODE: "1" })).toEqual({ + command: "codex", + args: ["app-server", "--listen", "stdio://", "--enable", "apps", "--enable", "hooks"], + }); + expect( + resolveCodexStdioCommand( + { args: ["app-server", "--listen", "stdio://", "--enable", "code_mode"] }, + { + CODEX_FLOWS_MODE: "code-mode", + CODEX_APP_SERVER_CODEX_PACKAGE: "@example/codex", + }, + ), + ).toEqual({ + command: "bunx", + args: ["@example/codex", "app-server", "--listen", "stdio://", "--enable", "code_mode"], + }); +}); + +test("explicit stdio command wins over codex-flows mode", () => { + expect( + resolveCodexStdioCommand( + { codexCommand: "/tmp/codex", args: ["app-server"] }, + { CODEX_FLOWS_MODE: "code-mode" }, + ), + ).toEqual({ + command: "/tmp/codex", + args: ["app-server"], + }); +}); + function fakeAppServerSource(): string { return ` console.error("fake-ready"); diff --git a/packages/flow-runtime/src/run.ts b/packages/flow-runtime/src/run.ts index f8517d1..a6a4cb6 100644 --- a/packages/flow-runtime/src/run.ts +++ b/packages/flow-runtime/src/run.ts @@ -1,3 +1,4 @@ +import { codexFlowsCodeModeEnabled } from "@peezy.tech/codex-flows"; import { runBunStep } from "./runners/bun.ts"; import { runCodeModeStep, type RunCodeModeStepOptions } from "./runners/code-mode.ts"; import type { FlowEvent, FlowResult, FlowStep, LoadedFlow } from "./types.ts"; @@ -28,6 +29,5 @@ export async function runFlowStep(options: RunFlowStepOptions): Promise): boolean { - const value = env.CODEX_FLOWS_ENABLE_CODE_MODE?.trim().toLowerCase(); - return value === "1" || value === "true" || value === "yes" || value === "on"; + return codexFlowsCodeModeEnabled(env); } diff --git a/packages/flow-runtime/test/flow-runtime.test.ts b/packages/flow-runtime/test/flow-runtime.test.ts index 43ea6e2..2eae487 100644 --- a/packages/flow-runtime/test/flow-runtime.test.ts +++ b/packages/flow-runtime/test/flow-runtime.test.ts @@ -9,6 +9,7 @@ import { runFlowStep, validateJsonSchema, } from "../src/index.ts"; +import { codeModeEnabled } from "../src/run.ts"; import type { FlowEvent } from "../src/index.ts"; test("discovers installed flows before source flows", async () => { @@ -89,6 +90,12 @@ test("bundled Code Mode flow remains gated by the feature flag", async () => { ).rejects.toThrow("requires CODEX_FLOWS_ENABLE_CODE_MODE=1"); }); +test("CODEX_FLOWS_MODE=code-mode enables Code Mode flow steps", () => { + expect(codeModeEnabled({})).toBe(false); + expect(codeModeEnabled({ CODEX_FLOWS_ENABLE_CODE_MODE: "1" })).toBe(true); + expect(codeModeEnabled({ CODEX_FLOWS_MODE: "code-mode" })).toBe(true); +}); + test("validates simple JSON schema constraints", () => { const schema = { type: "object", diff --git a/scripts/codex-release-update.code-mode.js b/scripts/codex-release-update.code-mode.js new file mode 100644 index 0000000..e605f5f --- /dev/null +++ b/scripts/codex-release-update.code-mode.js @@ -0,0 +1,351 @@ +const release = config.release; +const commands = []; + +function q(value) { + return "'" + String(value).replaceAll("'", "'\\''") + "'"; +} + +function trim(value) { + return String(value || "").trim(); +} + +function truncate(value, max) { + const textValue = String(value || ""); + if (textValue.length <= max) { + return textValue; + } + return textValue.slice(0, max) + "\n...[truncated " + String(textValue.length - max) + " chars]"; +} + +function outputOf(result) { + if (typeof result?.output === "string") { + return result.output; + } + return JSON.stringify(result ?? {}); +} + +function exitCodeOf(result) { + if (typeof result?.exit_code === "number") { + return result.exit_code; + } + if (typeof result?.exitCode === "number") { + return result.exitCode; + } + return null; +} + +function ok(result) { + return result.exit_code === 0; +} + +async function run(label, cmd, options = {}) { + const workdir = options.workdir || config.codexRepo; + text("\n### " + label + "\n$ " + cmd + "\n"); + const raw = await tools.exec_command({ + cmd, + workdir, + yield_time_ms: options.yield_time_ms || 1000, + max_output_tokens: options.max_output_tokens || 12000 + }); + const result = { + label, + cmd, + workdir, + exit_code: exitCodeOf(raw), + output: outputOf(raw) + }; + commands.push({ + ...result, + output: truncate(result.output, 4000) + }); + text("exit_code=" + String(result.exit_code) + "\n" + truncate(result.output, options.textLimit || 12000) + "\n"); + return result; +} + +function finish(status, message, extra = {}) { + const summary = { + status, + message, + releaseTag: release.tagName, + releaseUrl: release.url, + targetCommitish: release.targetCommitish, + ...extra, + commands + }; + text("\nCODEX_UPDATE_RESULT " + JSON.stringify(summary) + "\n"); + exit(); +} + +async function collectRebaseContext(rebaseOutput, beforeSha) { + const status = await run("rebase conflict status", "git status --short --branch", { max_output_tokens: 12000 }); + const unmerged = await run("unmerged files", "git diff --name-only --diff-filter=U", { max_output_tokens: 12000 }); + const diffStat = await run("conflict diff stat", "git diff --cc --stat", { max_output_tokens: 12000 }); + const conflictDiff = await run("conflict diff", "git diff --cc", { max_output_tokens: 30000, textLimit: 20000 }); + const currentPatch = await run("current rebase patch", "git rebase --show-current-patch", { max_output_tokens: 20000, textLimit: 12000 }); + return { + beforeSha, + rebaseOutput, + statusOutput: status.output, + unmergedFiles: unmerged.output.split(/\r?\n/).map((line) => line.trim()).filter(Boolean), + diffStat: diffStat.output, + conflictDiff: truncate(conflictDiff.output, 20000), + currentPatch: truncate(currentPatch.output, 12000), + interventionPrompt: "Continue this same thread to resolve the paused rebase. Preserve the native Code Mode replay/app-server changes, do not abort or reset unless explicitly instructed, then run the configured verification commands." + }; +} + +text([ + "Codex upstream update job", + "", + "Release: " + release.tagName + (release.url ? " (" + release.url + ")" : ""), + "Target branch: " + config.targetBranch, + "Codex repo: " + config.codexRepo, + "Codex Rust workspace: " + config.codexRustDir, + "Service repo: " + config.serviceRepo, + "Upstream remote: " + config.upstreamRemote + " -> " + config.upstreamRepoUrl, + "Cargo target dir: " + config.cargoTargetDir +].join("\n") + "\n"); + +const repoCheck = await run("verify codex repo", "git rev-parse --show-toplevel"); +if (!ok(repoCheck)) { + finish("failed", "codex repo is not a git checkout", { repoCheck: repoCheck.output }); +} + +const rustWorkspaceCheck = await run( + "verify codex Rust workspace", + "test -f " + q(config.codexRustDir + "/Cargo.toml"), + { max_output_tokens: 4000 } +); +if (!ok(rustWorkspaceCheck)) { + finish("failed", "codex Rust workspace was not found at the expected codex-rs path", { + codexRustDir: config.codexRustDir, + rustWorkspaceCheck: rustWorkspaceCheck.output + }); +} + +const existingRebase = await run( + "check existing rebase state", + "test -d \"$(git rev-parse --git-path rebase-merge)\" -o -d \"$(git rev-parse --git-path rebase-apply)\"", + { max_output_tokens: 4000 } +); +if (existingRebase.exit_code === 0) { + const context = await collectRebaseContext("A rebase was already in progress before this job started.", undefined); + finish("blocked", "A rebase is already in progress in the codex checkout.", context); +} + +await run("codex status before update", "git status --short --branch", { max_output_tokens: 12000 }); +const branch = await run("current branch", "git rev-parse --abbrev-ref HEAD", { max_output_tokens: 4000 }); +if (!ok(branch)) { + finish("failed", "could not read current branch", { branchOutput: branch.output }); +} + +if (trim(branch.output) !== config.targetBranch) { + const dirtyBeforeSwitch = await run("dirty check before branch switch", "git status --porcelain=v1", { max_output_tokens: 12000 }); + if (trim(dirtyBeforeSwitch.output)) { + finish("blocked", "codex checkout has local changes before switching branches.", { + dirtyStatus: dirtyBeforeSwitch.output + }); + } + const switched = await run("switch target branch", "git switch " + q(config.targetBranch), { max_output_tokens: 12000 }); + if (!ok(switched)) { + finish("failed", "could not switch to target branch", { switchOutput: switched.output }); + } +} + +const dirty = await run("dirty check on target branch", "git status --porcelain=v1", { max_output_tokens: 12000 }); +if (trim(dirty.output)) { + finish("blocked", "codex target branch has local changes. Resolve or stash them before updating.", { + dirtyStatus: dirty.output + }); +} + +const remote = await run( + "ensure upstream openai/codex remote", + "git remote get-url " + q(config.upstreamRemote) + " >/dev/null 2>&1 && git remote set-url " + q(config.upstreamRemote) + " " + q(config.upstreamRepoUrl) + " || git remote add " + q(config.upstreamRemote) + " " + q(config.upstreamRepoUrl), + { max_output_tokens: 12000 } +); +if (!ok(remote)) { + finish("failed", "could not configure upstream remote", { remoteOutput: remote.output }); +} + +const fetch = await run("fetch upstream tags", "git fetch " + q(config.upstreamRemote) + " --tags --prune", { max_output_tokens: 20000 }); +if (!ok(fetch)) { + finish("failed", "could not fetch upstream release tags", { fetchOutput: fetch.output }); +} + +const releaseCommit = await run( + "resolve release tag", + "git rev-parse --verify " + q("refs/tags/" + release.tagName + "^{commit}"), + { max_output_tokens: 4000 } +); +if (!ok(releaseCommit)) { + finish("failed", "could not resolve upstream release tag after fetch", { + releaseTag: release.tagName, + resolveOutput: releaseCommit.output + }); +} + +const beforeHead = await run("codex head before rebase", "git rev-parse HEAD", { max_output_tokens: 4000 }); +const rebase = await run("rebase target branch onto upstream release", "git rebase " + q(release.tagName), { max_output_tokens: 30000, textLimit: 20000 }); +if (!ok(rebase)) { + const context = await collectRebaseContext(rebase.output, trim(beforeHead.output)); + finish("conflict", "Rebase paused with conflicts.", context); +} + +const afterHead = await run("codex head after rebase", "git rev-parse HEAD", { max_output_tokens: 4000 }); +await run("codex status after rebase", "git status --short --branch", { max_output_tokens: 12000 }); + +const build = await run( + "build explicit fork binary", + "CARGO_TARGET_DIR=" + q(config.cargoTargetDir) + " cargo build -p codex-cli --bin codex", + { workdir: config.codexRustDir, max_output_tokens: 30000, textLimit: 20000 } +); +if (!ok(build)) { + finish("failed", "fork binary build failed", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + buildOutput: build.output + }); +} + +const version = await run("verify explicit fork binary", q(config.codexBinary) + " --version", { max_output_tokens: 4000 }); +if (!ok(version)) { + finish("failed", "built fork binary did not run", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + versionOutput: version.output + }); +} + +const cargoCheck = await run( + "cargo check replay packages", + "CARGO_TARGET_DIR=" + q(config.cargoTargetDir) + " cargo check -p codex-app-server -p codex-core -p codex-app-server-protocol", + { workdir: config.codexRustDir, max_output_tokens: 30000, textLimit: 20000 } +); +if (!ok(cargoCheck)) { + finish("failed", "cargo check failed after rebase", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + cargoCheckOutput: cargoCheck.output + }); +} + +const protocolTest = await run( + "protocol code mode execute test", + "CARGO_TARGET_DIR=" + q(config.cargoTargetDir) + " cargo test -p codex-app-server-protocol thread_code_mode_execute -- --nocapture", + { workdir: config.codexRustDir, max_output_tokens: 30000, textLimit: 20000 } +); +if (!ok(protocolTest)) { + finish("failed", "protocol replay API test failed after rebase", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + protocolTestOutput: protocolTest.output + }); +} + +const fmt = await run("cargo fmt check", "cargo fmt --check", { + workdir: config.codexRustDir, + max_output_tokens: 20000 +}); +if (!ok(fmt)) { + finish("failed", "cargo fmt --check failed after rebase", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + fmtOutput: fmt.output + }); +} + +const codexDiffCheck = await run("codex diff whitespace check", "git diff --check", { max_output_tokens: 12000 }); +if (!ok(codexDiffCheck)) { + finish("failed", "codex git diff --check failed after rebase", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + diffCheckOutput: codexDiffCheck.output + }); +} + +const generate = await run( + "regenerate codex-flows app-server TypeScript bindings", + q(config.codexBinary) + " app-server generate-ts --experimental --out " + q(config.generatedDir), + { workdir: config.serviceRepo, max_output_tokens: 30000, textLimit: 20000 } +); +if (!ok(generate)) { + finish("failed", "failed to regenerate codex-flows TypeScript bindings from fork binary", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + generateOutput: generate.output + }); +} + +const generatedStatus = await run( + "generated TypeScript binding status", + "git status --short -- packages/codex-client/src/app-server/generated", + { workdir: config.serviceRepo, max_output_tokens: 12000 } +); + +const bunInstall = await run("refresh service dependencies", "bun install --frozen-lockfile", { + workdir: config.serviceRepo, + max_output_tokens: 20000 +}); +if (!ok(bunInstall)) { + finish("failed", "bun install --frozen-lockfile failed in codex-flows", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + bunInstallOutput: bunInstall.output + }); +} + +const serviceTypes = await run("service typecheck", "bun run check:types", { + workdir: config.serviceRepo, + max_output_tokens: 30000, + textLimit: 20000 +}); +if (!ok(serviceTypes)) { + finish("failed", "codex-flows typecheck failed after generated binding update", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + serviceTypesOutput: serviceTypes.output + }); +} + +const serviceTests = await run("service tests", "bun run test", { + workdir: config.serviceRepo, + max_output_tokens: 30000, + textLimit: 20000 +}); +if (!ok(serviceTests)) { + finish("failed", "codex-flows tests failed after generated binding update", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + serviceTestsOutput: serviceTests.output + }); +} + +const serviceDiffCheck = await run("service diff whitespace check", "git diff --check", { + workdir: config.serviceRepo, + max_output_tokens: 12000 +}); +if (!ok(serviceDiffCheck)) { + finish("failed", "codex-flows git diff --check failed", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + serviceDiffCheckOutput: serviceDiffCheck.output + }); +} + +const codexStatus = await run("final codex status", "git status --short --branch", { max_output_tokens: 12000 }); +const serviceStatus = await run("final service status", "git status --short --branch", { + workdir: config.serviceRepo, + max_output_tokens: 12000 +}); + +finish("completed", "Codex fork rebased onto upstream release and verified. Review diffs, push explicitly, and publish @peezy.tech/codex to npm when ready.", { + beforeSha: trim(beforeHead.output), + afterSha: trim(afterHead.output), + codexHead: trim(afterHead.output), + codexBinary: config.codexBinary, + codexVersion: trim(version.output), + generatedStatus: generatedStatus.output, + codexStatus: codexStatus.output, + serviceStatus: serviceStatus.output +}); diff --git a/scripts/run-code-mode-in-new-thread.ts b/scripts/run-code-mode-in-new-thread.ts new file mode 100644 index 0000000..b6f2bcb --- /dev/null +++ b/scripts/run-code-mode-in-new-thread.ts @@ -0,0 +1,634 @@ +#!/usr/bin/env bun +import { readFile } from "node:fs/promises"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +import { CodexAppServerClient } from "../packages/codex-client/src/index.ts"; + +type Args = { + candidate: string; + cwd?: string; + codexCommand?: string; + codexHome?: string; + cliPath: string; + timeoutMs: number; + ephemeral: boolean; + stream: boolean; + injectContext: boolean; + injectResult: boolean; + notes: string[]; + threadName?: string | null; + mode: ReplayMode; +}; + +type ReplayMode = "native" | "shim"; + +type CandidateMetadata = Record; + +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const defaultCliPath = path.join(repoRoot, "apps/cli/src/index.ts"); + +async function main() { + const args = await parseArgs(process.argv.slice(2)); + const candidate = path.resolve(args.candidate); + const metadata = await readCandidateMetadata(candidate); + const cwd = path.resolve(args.cwd ?? metadataCwd(metadata) ?? process.cwd()); + const source = await readFile(candidate, "utf8"); + const cliPath = path.resolve(args.cliPath); + const threadName = + args.threadName === undefined ? defaultThreadName(candidate) : args.threadName; + const command = + args.mode === "shim" + ? replayCommand({ + candidate, + cliPath, + codexCommand: args.codexCommand, + cwd, + timeoutMs: args.timeoutMs, + }) + : undefined; + const client = new CodexAppServerClient({ + transportOptions: { + codexCommand: args.codexCommand, + args: appServerArgs(), + env: args.codexHome ? { CODEX_HOME: path.resolve(args.codexHome) } : undefined, + requestTimeoutMs: args.timeoutMs, + }, + clientName: "code-mode-replay-thread", + clientTitle: "Code Mode Replay Thread", + clientVersion: "0.1.0", + }); + const output: string[] = []; + let completedItem: unknown; + let commandExitCode: number | null = null; + let resolveTurnCompleted: (value: unknown) => void = () => undefined; + const turnCompleted = new Promise((resolve) => { + resolveTurnCompleted = resolve; + }); + + client.on("request", (message) => { + client.respondError(message.id, -32603, "replay script does not handle server requests"); + }); + client.on("notification", (message) => { + if (message.method === "item/commandExecution/outputDelta") { + const delta = stringField(message.params, "delta"); + if (delta) { + output.push(delta); + if (args.stream) { + process.stdout.write(delta); + } + } + } + if (message.method === "item/agentMessage/delta") { + const delta = stringField(message.params, "delta"); + if (delta) { + output.push(delta); + if (args.stream) { + process.stdout.write(delta); + } + } + } + if (message.method === "item/completed") { + const item = recordField(message.params, "item"); + if (item && stringField(item, "type") === "commandExecution") { + completedItem = item; + commandExitCode = numberField(item, "exitCode") ?? numberField(item, "exit_code"); + } + if (item && stringField(item, "type") === "agentMessage") { + completedItem = item; + } + } + if (message.method === "turn/completed") { + resolveTurnCompleted(message.params); + } + }); + + try { + await client.connect(); + const started = await client.startThread({ + cwd, + approvalPolicy: "never", + sandbox: "danger-full-access", + ephemeral: args.ephemeral, + experimentalRawEvents: false, + persistExtendedHistory: false, + }); + const threadId = started.thread.id; + if (threadName) { + await client.request("thread/name/set", { + threadId, + name: threadName, + }); + } + let injectedContext = false; + if (args.injectContext || args.notes.length > 0) { + await injectAssistantText( + client, + threadId, + replayContextText({ + candidate, + codexHome: args.codexHome ? path.resolve(args.codexHome) : undefined, + cwd, + metadata, + mode: args.mode, + notes: args.notes, + source, + }), + ); + injectedContext = true; + } + + if (args.mode === "shim") { + await client.request("thread/shellCommand", { + threadId, + command, + }); + } else { + await client.request("thread/codeMode/execute", { + threadId, + source, + }); + } + + const completed = await withTimeout( + turnCompleted, + args.timeoutMs, + "timed out waiting for Code Mode replay completion", + ); + let replayOutput = output.join(""); + if (args.mode === "native") { + const read = await client.request("thread/read", { + threadId, + includeTurns: true, + }); + replayOutput = latestAgentMessageText(read) ?? replayOutput; + if (args.stream && replayOutput && output.length === 0) { + process.stdout.write(replayOutput.endsWith("\n") ? replayOutput : replayOutput + "\n"); + } + } + let injectedResult = false; + if (args.injectResult) { + await injectAssistantText( + client, + threadId, + replayResultText({ + candidate, + command, + commandExitCode, + cwd, + mode: args.mode, + output: replayOutput, + }), + ); + injectedResult = true; + } + const result = { + threadId, + cwd, + candidate, + mode: args.mode, + command, + commandExitCode: args.mode === "shim" ? commandExitCode : null, + output: replayOutput, + injectedContext, + injectedResult, + threadName, + codexHome: args.codexHome ? path.resolve(args.codexHome) : undefined, + notes: args.notes, + completed, + completedItem, + }; + process.stdout.write(JSON.stringify(result, null, 2) + "\n"); + process.stdout.write("threadId=" + threadId + "\n"); + process.exitCode = args.mode === "shim" ? commandExitCode ?? 0 : 0; + } finally { + client.close(); + } +} + +function replayCommand(options: { + candidate: string; + cliPath: string; + codexCommand?: string; + cwd: string; + timeoutMs: number; +}) { + const command = [ + "bun", + shellQuote(options.cliPath), + "--url", + "stdio://", + "--timeout-ms", + String(options.timeoutMs), + "run-code-mode", + shellQuote(options.candidate), + "--cwd", + shellQuote(options.cwd), + ]; + if (options.codexCommand) { + command.splice(4, 0, "--codex-command", shellQuote(options.codexCommand)); + } + return command.join(" "); +} + +function appServerArgs() { + return [ + "app-server", + "--listen", + "stdio://", + "--enable", + "apps", + "--enable", + "hooks", + "--enable", + "code_mode", + "--enable", + "code_mode_only", + ]; +} + +function defaultThreadName(candidate: string) { + return "Code Mode replay: " + path.basename(candidate); +} + +async function readCandidateMetadata(candidate: string): Promise { + const metadataPath = candidate.replace(/\.[^.]+$/, ".json"); + try { + const parsed = JSON.parse(await readFile(metadataPath, "utf8")) as unknown; + return isRecord(parsed) ? parsed : undefined; + } catch { + return undefined; + } +} + +function metadataCwd(metadata: CandidateMetadata | undefined) { + const cwd = metadata?.cwd; + return typeof cwd === "string" && cwd ? cwd : undefined; +} + +function latestAgentMessageText(value: unknown) { + const thread = recordField(value, "thread"); + const turns = Array.isArray(thread?.turns) ? thread.turns : []; + for (const turn of turns.slice().reverse()) { + const turnRecord = isRecord(turn) ? turn : undefined; + const items = Array.isArray(turnRecord?.items) ? turnRecord.items : []; + for (const item of items.slice().reverse()) { + if (!isRecord(item) || stringField(item, "type") !== "agentMessage") { + continue; + } + const text = stringField(item, "text"); + if (text !== undefined) { + return text; + } + } + } + return undefined; +} + +async function injectAssistantText( + client: CodexAppServerClient, + threadId: string, + text: string, +) { + await client.request("thread/inject_items", { + threadId, + items: [ + { + type: "message", + role: "assistant", + content: [ + { + type: "output_text", + text, + }, + ], + }, + ], + }); +} + +function replayContextText(options: { + candidate: string; + codexHome?: string; + cwd: string; + metadata: CandidateMetadata | undefined; + mode: ReplayMode; + notes: string[]; + source: string; +}) { + const parts = [ + "Code Mode replay context", + "", + "Candidate: " + options.candidate, + "Working directory: " + options.cwd, + "Replay mode: " + options.mode, + ]; + if (options.codexHome) { + parts.push("Codex home: " + options.codexHome); + } + if (options.notes.length > 0) { + parts.push("", "Notes:"); + for (const note of options.notes) { + parts.push("- " + note); + } + } + parts.push( + "", + "Candidate metadata:", + options.metadata ? formatJson(options.metadata) : "unavailable", + "", + "Saved Code Mode script:", + truncateText(options.source, 50_000), + ); + return parts.join("\n"); +} + +function replayResultText(options: { + candidate: string; + command: string | undefined; + commandExitCode: number | null; + cwd: string; + mode: ReplayMode; + output: string; +}) { + const lines = [ + "Code Mode replay result", + "", + "Candidate: " + options.candidate, + "Working directory: " + options.cwd, + "Replay mode: " + options.mode, + ]; + if (options.command !== undefined) { + lines.push( + "Command exit code: " + String(options.commandExitCode), + "", + "Thread shell command:", + options.command, + ); + } + lines.push( + "", + "Replay output:", + truncateText(options.output, 50_000), + ); + return lines.join("\n"); +} + +function truncateText(value: string, limit: number) { + if (value.length <= limit) { + return value; + } + return ( + value.slice(0, limit) + + "\n...[truncated " + + String(value.length - limit) + + " chars]" + ); +} + +function formatJson(value: unknown) { + return JSON.stringify(value, null, 2); +} + +async function parseArgs(argv: string[]): Promise { + let candidate: string | undefined; + let cwd: string | undefined; + let codexCommand = process.env.CODEX_APP_SERVER_CODEX_COMMAND; + let codexHome: string | undefined; + let cliPath = defaultCliPath; + let timeoutMs = 180_000; + let ephemeral = false; + let stream = true; + let injectContext = true; + let injectResult = true; + const notes: string[] = []; + let threadName: string | null | undefined; + let mode: ReplayMode = "native"; + + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]; + if (!arg) { + continue; + } + if (arg === "-h" || arg === "--help") { + printHelp(); + process.exit(0); + } + if (arg === "--cwd") { + cwd = requiredValue(argv, ++index, "--cwd"); + continue; + } + if (arg.startsWith("--cwd=")) { + cwd = arg.slice("--cwd=".length); + continue; + } + if (arg === "--codex-command") { + codexCommand = requiredValue(argv, ++index, "--codex-command"); + continue; + } + if (arg.startsWith("--codex-command=")) { + codexCommand = arg.slice("--codex-command=".length); + continue; + } + if (arg === "--codex-home") { + codexHome = requiredValue(argv, ++index, "--codex-home"); + continue; + } + if (arg.startsWith("--codex-home=")) { + codexHome = arg.slice("--codex-home=".length); + continue; + } + if (arg === "--native") { + mode = "native"; + continue; + } + if (arg === "--shim") { + mode = "shim"; + continue; + } + if (arg === "--cli") { + cliPath = requiredValue(argv, ++index, "--cli"); + continue; + } + if (arg.startsWith("--cli=")) { + cliPath = arg.slice("--cli=".length); + continue; + } + if (arg === "--timeout-ms") { + timeoutMs = parseTimeout(requiredValue(argv, ++index, "--timeout-ms")); + continue; + } + if (arg.startsWith("--timeout-ms=")) { + timeoutMs = parseTimeout(arg.slice("--timeout-ms=".length)); + continue; + } + if (arg === "--ephemeral") { + ephemeral = true; + continue; + } + if (arg === "--no-stream") { + stream = false; + continue; + } + if (arg === "--no-inject-context") { + injectContext = false; + continue; + } + if (arg === "--no-inject-result") { + injectResult = false; + continue; + } + if (arg === "--name") { + threadName = requiredValue(argv, ++index, "--name"); + continue; + } + if (arg.startsWith("--name=")) { + threadName = arg.slice("--name=".length); + continue; + } + if (arg === "--no-name") { + threadName = null; + continue; + } + if (arg === "--note") { + notes.push(requiredValue(argv, ++index, "--note")); + continue; + } + if (arg.startsWith("--note=")) { + notes.push(arg.slice("--note=".length)); + continue; + } + if (arg.startsWith("-")) { + throw new Error("unknown option: " + arg); + } + if (candidate) { + throw new Error("unexpected positional argument: " + arg); + } + candidate = arg; + } + + if (!candidate) { + printHelp(); + throw new Error("candidate file is required"); + } + + return { + candidate, + cwd, + codexCommand, + codexHome, + cliPath, + timeoutMs, + ephemeral, + stream, + injectContext, + injectResult, + notes, + threadName, + mode, + }; +} + +function requiredValue(argv: string[], index: number, flag: string) { + const value = argv[index]; + if (!value) { + throw new Error(flag + " requires a value"); + } + return value; +} + +function parseTimeout(value: string) { + const parsed = Number(value); + if (!Number.isFinite(parsed) || parsed <= 0) { + throw new Error("invalid --timeout-ms value: " + value); + } + return parsed; +} + +function shellQuote(value: string) { + return "'" + value.replaceAll("'", "'\\''") + "'"; +} + +async function withTimeout(promise: Promise, timeoutMs: number, message: string) { + let timer: ReturnType | undefined; + try { + return await Promise.race([ + promise, + new Promise((_, reject) => { + timer = setTimeout(() => reject(new Error(message)), timeoutMs); + }), + ]); + } finally { + if (timer) { + clearTimeout(timer); + } + } +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function recordField(value: unknown, field: string) { + if (typeof value !== "object" || value === null || Array.isArray(value)) { + return undefined; + } + const record = value as Record; + const nested = record[field]; + return typeof nested === "object" && nested !== null && !Array.isArray(nested) + ? (nested as Record) + : undefined; +} + +function stringField(value: unknown, field: string) { + if (typeof value !== "object" || value === null || Array.isArray(value)) { + return undefined; + } + const fieldValue = (value as Record)[field]; + return typeof fieldValue === "string" ? fieldValue : undefined; +} + +function numberField(value: unknown, field: string) { + if (typeof value !== "object" || value === null || Array.isArray(value)) { + return undefined; + } + const fieldValue = (value as Record)[field]; + return typeof fieldValue === "number" ? fieldValue : undefined; +} + +function printHelp() { + process.stdout.write( + [ + "Run a saved Code Mode candidate in a new Codex thread without starting a model turn.", + "", + "Usage:", + " bun scripts/run-code-mode-in-new-thread.ts [options]", + "", + "Options:", + " --cwd Thread cwd. Defaults to candidate sidecar cwd, then process cwd.", + " --codex-command Codex binary for both app-server and replay.", + " Defaults to CODEX_APP_SERVER_CODEX_COMMAND.", + " With CODEX_FLOWS_MODE=code-mode, falls back to", + " bunx @peezy.tech/codex.", + " --codex-home CODEX_HOME for the spawned app-server, useful for prepared MCP config.", + " --native Use native thread/codeMode/execute replay. This is the default.", + " --shim Use the older TypeScript shell-command shim fallback.", + " --cli codex-app CLI path. Defaults to " + defaultCliPath, + " --timeout-ms Timeout for app-server requests and completion wait.", + " --ephemeral Create an ephemeral thread.", + " --no-stream Do not stream command output while waiting.", + " --note Add a note to the injected replay context. Repeatable.", + " --name Set the thread title. Defaults to the candidate filename.", + " --no-name Leave the thread title unset.", + " --no-inject-context Skip injecting candidate metadata/source before execution.", + " --no-inject-result Skip injecting the replay summary after execution.", + " -h, --help Show this help.", + "", + ].join("\n"), + ); +} + +main().catch((error) => { + console.error(error instanceof Error ? error.stack ?? error.message : String(error)); + process.exit(1); +}); diff --git a/scripts/run-codex-release-update-thread.ts b/scripts/run-codex-release-update-thread.ts new file mode 100644 index 0000000..fde21a8 --- /dev/null +++ b/scripts/run-codex-release-update-thread.ts @@ -0,0 +1,749 @@ +#!/usr/bin/env bun +import { $ } from "bun"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +import { CodexAppServerClient } from "../packages/codex-client/src/index.ts"; + +type Args = { + cargoTargetDir: string; + codexCommand?: string; + codexHome?: string; + codexRepo: string; + ephemeral: boolean; + force: boolean; + handledNpmPackage?: string; + npmRegistry: string; + releaseTag?: string; + serviceRepo: string; + stream: boolean; + targetBranch: string; + threadName?: string; + timeoutMs: number; + upstreamRemote: string; + upstreamRepo: string; +}; + +type ReleaseInfo = { + tagName: string; + name?: string; + publishedAt?: string; + url?: string; + body?: string; + targetCommitish?: string; +}; + +type HandledNpmRelease = { + packageName: string; + registry: string; + version: string; +}; + +type CodeModeUpdateResult = { + status: "blocked" | "completed" | "conflict" | "failed"; + message?: string; + releaseTag?: string; + releaseUrl?: string; + beforeSha?: string; + afterSha?: string; + codexHead?: string; + commands?: unknown[]; +}; + +const serviceRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), ".."); +const workspaceRoot = path.resolve(serviceRoot, ".."); +const defaultCodexRepo = path.join(workspaceRoot, "codex"); +const defaultCargoTargetDir = "/tmp/codex-fork-workspace-target"; +const defaultHandledNpmPackage = + process.env.CODEX_UPDATE_HANDLED_NPM_PACKAGE ?? "@peezy.tech/codex"; +const defaultNpmRegistry = + process.env.CODEX_UPDATE_NPM_REGISTRY ?? "https://registry.npmjs.org/"; + +async function main() { + const args = parseArgs(Bun.argv.slice(2)); + const release = await latestRelease(args); + const handledRelease = await latestHandledNpmRelease(args, release); + + if (!args.force && handledRelease) { + writeJson({ + status: "skipped", + message: `Release ${release.tagName} is already covered by ${handledRelease.packageName}@${handledRelease.version}.`, + release, + handledRelease, + }); + return; + } + + await ensureCodexCommandExists(args.codexCommand); + const result = await runUpdateThread(args, release, handledRelease); + + writeJson(result); + process.stdout.write(`threadId=${result.threadId}\n`); + + if (result.updateResult?.status === "completed") { + return; + } + if (result.updateResult?.status === "conflict") { + process.exitCode = 2; + return; + } + process.exitCode = 1; +} + +async function latestRelease(args: Args): Promise { + const fields = "tagName,name,publishedAt,url,body,targetCommitish"; + try { + const release = args.releaseTag + ? await $`gh release view ${args.releaseTag} --repo ${args.upstreamRepo} --json ${fields}`.json() + : await $`gh release view --repo ${args.upstreamRepo} --json ${fields}`.json(); + return requireReleaseInfo(release); + } catch (error) { + throw new Error(`Failed to read ${args.upstreamRepo} release: ${errorMessage(error)}`); + } +} + +async function latestHandledNpmRelease( + args: Args, + release: ReleaseInfo, +): Promise { + if (!args.handledNpmPackage) { + return undefined; + } + const version = releaseVersion(release.tagName); + if (!version) { + throw new Error(`Could not normalize release tag to an npm version: ${release.tagName}`); + } + const spec = `${args.handledNpmPackage}@${version}`; + const result = await $`npm view ${spec} version --registry ${args.npmRegistry} --json`.nothrow().quiet(); + if (result.exitCode !== 0) { + return undefined; + } + try { + const parsed = JSON.parse(result.stdout.toString()) as unknown; + if (parsed !== version) { + throw new Error(`expected ${version}, got ${String(parsed)}`); + } + return { + packageName: args.handledNpmPackage, + registry: args.npmRegistry, + version, + }; + } catch (error) { + throw new Error( + `Failed to parse npm package version for ${spec}: ${errorMessage(error)}`, + ); + } +} + +async function runUpdateThread( + args: Args, + release: ReleaseInfo, + handledRelease: HandledNpmRelease | undefined, +) { + const threadName = + args.threadName ?? + `Codex upstream update: ${release.tagName} -> ${args.targetBranch}`; + const source = await updateCodeModeSource(args, release); + const client = new CodexAppServerClient({ + transportOptions: { + codexCommand: args.codexCommand, + args: appServerArgs(), + cwd: args.codexRepo, + env: args.codexHome + ? { CODEX_HOME: path.resolve(args.codexHome) } + : undefined, + requestTimeoutMs: args.timeoutMs, + }, + clientName: "codex-update-thread", + clientTitle: "Codex Update Thread", + clientVersion: "0.1.0", + }); + + const output: string[] = []; + let threadId = ""; + let completedItem: unknown; + let resolveTurnCompleted: (value: unknown) => void = () => undefined; + const turnCompleted = new Promise((resolve) => { + resolveTurnCompleted = resolve; + }); + + client.on("request", (message) => { + client.respondError( + message.id, + -32603, + "codex update launcher does not handle server requests", + ); + }); + client.on("notification", (message) => { + if (message.method === "item/commandExecution/outputDelta") { + const delta = stringField(message.params, "delta"); + if (delta) { + output.push(delta); + if (args.stream) { + process.stdout.write(delta); + } + } + } + if (message.method === "item/agentMessage/delta") { + const delta = stringField(message.params, "delta"); + if (delta) { + output.push(delta); + if (args.stream) { + process.stdout.write(delta); + } + } + } + if (message.method === "item/completed") { + completedItem = recordField(message.params, "item") ?? completedItem; + } + if ( + message.method === "turn/completed" && + (!threadId || stringField(message.params, "threadId") === threadId) + ) { + resolveTurnCompleted(message.params); + } + }); + + try { + await client.connect(); + const started = await client.startThread({ + cwd: args.codexRepo, + approvalPolicy: "never", + sandbox: "danger-full-access", + ephemeral: args.ephemeral, + experimentalRawEvents: false, + persistExtendedHistory: true, + }); + threadId = started.thread.id; + await client.request("thread/name/set", { + threadId, + name: threadName, + }); + await injectAssistantText( + client, + threadId, + updateContextText(args, release, handledRelease, source), + ); + await client.request("thread/codeMode/execute", { + threadId, + source, + }); + const completed = await withTimeout( + turnCompleted, + args.timeoutMs, + "timed out waiting for Codex update Code Mode completion", + ); + const read = await client.request("thread/read", { + threadId, + includeTurns: true, + }); + const agentText = allAgentMessageText(read).join("\n"); + const replayOutput = agentText || output.join(""); + const updateResult = parseUpdateResult(replayOutput); + return { + status: updateResult?.status ?? "unknown", + threadId, + threadName, + release, + handledRelease, + codexRepo: args.codexRepo, + serviceRepo: args.serviceRepo, + updateResult, + output: replayOutput, + completed, + completedItem, + }; + } finally { + client.close(); + } +} + +async function updateCodeModeSource(args: Args, release: ReleaseInfo): Promise { + const config = { + cargoTargetDir: args.cargoTargetDir, + codexBinary: path.join(args.cargoTargetDir, "debug", "codex"), + codexRepo: args.codexRepo, + codexRustDir: path.join(args.codexRepo, "codex-rs"), + generatedDir: path.join( + args.serviceRepo, + "packages", + "codex-client", + "src", + "app-server", + "generated", + ), + release, + serviceRepo: args.serviceRepo, + targetBranch: args.targetBranch, + upstreamRemote: args.upstreamRemote, + upstreamRepoUrl: `https://github.com/${args.upstreamRepo}.git`, + }; + const configSource = `const config = ${JSON.stringify(config, null, 2)};\n`; + const bodySource = await Bun.file( + path.join(serviceRoot, "scripts", "codex-release-update.code-mode.js"), + ).text(); + return configSource + bodySource; +} + +function updateContextText( + args: Args, + release: ReleaseInfo, + handledRelease: HandledNpmRelease | undefined, + source: string, +) { + return [ + "Codex upstream update job context", + "", + "Purpose: update the local codex fork branch from the latest openai/codex GitHub release through native Code Mode.", + "", + "Release:", + formatJson(release), + "", + "Paths:", + "- codex repo: " + args.codexRepo, + "- codex-flows repo: " + args.serviceRepo, + "- cargo target dir: " + args.cargoTargetDir, + "- app-server command for this thread: " + + (args.codexCommand ?? "bunx @peezy.tech/codex"), + args.handledNpmPackage + ? "- handled npm package: " + args.handledNpmPackage + : "- handled npm package: disabled", + "- npm registry: " + args.npmRegistry, + "", + "Policy:", + "- Do not run a global Codex install.", + "- Rebase " + args.targetBranch + " onto the upstream release tag from " + args.upstreamRepo + ".", + "- If rebase conflicts occur, preserve the paused rebase state and continue this same thread for intervention.", + "- Treat the published npm package version as the durable handled-release marker; do not write local hidden version state.", + "", + "Handled npm package version:", + handledRelease ? formatJson(handledRelease) : "unavailable or disabled", + "", + "Generated Code Mode source:", + truncateText(source, 50_000), + ].join("\n"); +} + +function appServerArgs() { + return [ + "app-server", + "--listen", + "stdio://", + "--enable", + "apps", + "--enable", + "hooks", + "--enable", + "code_mode", + "--enable", + "code_mode_only", + ]; +} + +async function injectAssistantText( + client: CodexAppServerClient, + threadId: string, + text: string, +) { + await client.request("thread/inject_items", { + threadId, + items: [ + { + type: "message", + role: "assistant", + content: [ + { + type: "output_text", + text, + }, + ], + }, + ], + }); +} + +function parseUpdateResult(output: string): CodeModeUpdateResult | undefined { + for (const line of output.split(/\r?\n/).reverse()) { + const prefix = "CODEX_UPDATE_RESULT "; + const index = line.indexOf(prefix); + if (index === -1) { + continue; + } + const text = line.slice(index + prefix.length).trim(); + try { + const parsed = JSON.parse(text) as unknown; + if ( + isRecord(parsed) && + (parsed.status === "completed" || + parsed.status === "conflict" || + parsed.status === "blocked" || + parsed.status === "failed") + ) { + return parsed as CodeModeUpdateResult; + } + } catch { + return undefined; + } + } + return undefined; +} + +async function ensureCodexCommandExists(codexCommand: string | undefined) { + if (!codexCommand) { + return; + } + if (!path.isAbsolute(codexCommand)) { + throw new Error( + `Codex command must be an explicit local fork binary path, not a PATH lookup: ${codexCommand}`, + ); + } + const file = Bun.file(codexCommand); + if (!(await file.exists())) { + throw new Error( + `Codex command does not exist: ${codexCommand}. Build the fork binary first or pass --codex-command.`, + ); + } +} + +function allAgentMessageText(value: unknown) { + const thread = recordField(value, "thread"); + const turns = Array.isArray(thread?.turns) ? thread.turns : []; + const texts: string[] = []; + for (const turn of turns) { + const turnRecord = isRecord(turn) ? turn : undefined; + const items = Array.isArray(turnRecord?.items) ? turnRecord.items : []; + for (const item of items) { + if (!isRecord(item) || stringField(item, "type") !== "agentMessage") { + continue; + } + const text = stringField(item, "text"); + if (text !== undefined) { + texts.push(text); + } + } + } + return texts; +} + +function requireReleaseInfo(value: unknown): ReleaseInfo { + if (!isRecord(value) || typeof value.tagName !== "string" || !value.tagName.trim()) { + throw new Error("GitHub release response did not include tagName"); + } + return { + tagName: value.tagName, + ...(typeof value.name === "string" ? { name: value.name } : {}), + ...(typeof value.publishedAt === "string" + ? { publishedAt: value.publishedAt } + : {}), + ...(typeof value.url === "string" ? { url: value.url } : {}), + ...(typeof value.body === "string" ? { body: value.body } : {}), + ...(typeof value.targetCommitish === "string" + ? { targetCommitish: value.targetCommitish } + : {}), + }; +} + +function releaseVersion(tagName: string) { + return tagName.match(/\d+\.\d+\.\d+(?:[-+][0-9A-Za-z.-]+)?/)?.[0]; +} + +function parseArgs(argv: string[]): Args { + let cargoTargetDir = process.env.CARGO_TARGET_DIR ?? defaultCargoTargetDir; + let codexCommand = process.env.CODEX_APP_SERVER_CODEX_COMMAND; + let codexHome: string | undefined; + let codexRepo = defaultCodexRepo; + let ephemeral = false; + let force = false; + let handledNpmPackage: string | undefined = defaultHandledNpmPackage; + let npmRegistry = defaultNpmRegistry; + let releaseTag: string | undefined; + let serviceRepo = serviceRoot; + let stream = true; + let targetBranch = "code-mode-exec-hooks"; + let threadName: string | undefined; + let timeoutMs = 1_800_000; + let upstreamRemote = "upstream"; + let upstreamRepo = "openai/codex"; + + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]; + if (!arg) { + continue; + } + if (arg === "-h" || arg === "--help") { + printHelp(); + process.exit(0); + } + if (arg === "--cargo-target-dir") { + cargoTargetDir = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--cargo-target-dir=")) { + cargoTargetDir = arg.slice("--cargo-target-dir=".length); + continue; + } + if (arg === "--codex-command") { + codexCommand = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--codex-command=")) { + codexCommand = arg.slice("--codex-command=".length); + continue; + } + if (arg === "--codex-home") { + codexHome = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--codex-home=")) { + codexHome = arg.slice("--codex-home=".length); + continue; + } + if (arg === "--codex-repo") { + codexRepo = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--codex-repo=")) { + codexRepo = arg.slice("--codex-repo=".length); + continue; + } + if (arg === "--ephemeral") { + ephemeral = true; + continue; + } + if (arg === "--force") { + force = true; + continue; + } + if (arg === "--handled-npm-package") { + handledNpmPackage = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--handled-npm-package=")) { + handledNpmPackage = arg.slice("--handled-npm-package=".length); + continue; + } + if (arg === "--no-handled-npm-check" || arg === "--no-handled-release-check") { + handledNpmPackage = undefined; + continue; + } + if (arg === "--npm-registry") { + npmRegistry = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--npm-registry=")) { + npmRegistry = arg.slice("--npm-registry=".length); + continue; + } + if (arg === "--release-tag") { + releaseTag = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--release-tag=")) { + releaseTag = arg.slice("--release-tag=".length); + continue; + } + if (arg === "--service-repo") { + serviceRepo = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--service-repo=")) { + serviceRepo = arg.slice("--service-repo=".length); + continue; + } + if (arg === "--no-stream") { + stream = false; + continue; + } + if (arg === "--target-branch") { + targetBranch = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--target-branch=")) { + targetBranch = arg.slice("--target-branch=".length); + continue; + } + if (arg === "--name") { + threadName = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--name=")) { + threadName = arg.slice("--name=".length); + continue; + } + if (arg === "--timeout-ms") { + timeoutMs = parsePositiveInteger(requiredValue(argv, ++index, arg), arg); + continue; + } + if (arg.startsWith("--timeout-ms=")) { + timeoutMs = parsePositiveInteger(arg.slice("--timeout-ms=".length), "--timeout-ms"); + continue; + } + if (arg === "--upstream-remote") { + upstreamRemote = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--upstream-remote=")) { + upstreamRemote = arg.slice("--upstream-remote=".length); + continue; + } + if (arg === "--upstream-repo") { + upstreamRepo = requiredValue(argv, ++index, arg); + continue; + } + if (arg.startsWith("--upstream-repo=")) { + upstreamRepo = arg.slice("--upstream-repo=".length); + continue; + } + throw new Error("unknown argument: " + arg); + } + + cargoTargetDir = path.resolve(cargoTargetDir); + codexCommand = codexCommand ? resolveCommand(codexCommand) : undefined; + codexRepo = path.resolve(codexRepo); + serviceRepo = path.resolve(serviceRepo); + + return { + cargoTargetDir, + codexCommand, + codexHome, + codexRepo, + ephemeral, + force, + handledNpmPackage, + npmRegistry, + releaseTag, + serviceRepo, + stream, + targetBranch, + threadName, + timeoutMs, + upstreamRemote, + upstreamRepo, + }; +} + +function requiredValue(argv: string[], index: number, flag: string) { + const value = argv[index]; + if (!value) { + throw new Error(flag + " requires a value"); + } + return value; +} + +function parsePositiveInteger(value: string, flag: string) { + const parsed = Number(value); + if (!Number.isInteger(parsed) || parsed <= 0) { + throw new Error(`invalid ${flag} value: ${value}`); + } + return parsed; +} + +function resolveCommand(command: string) { + if (path.isAbsolute(command) || command.includes("/") || command.includes("\\")) { + return path.resolve(command); + } + return command; +} + +async function withTimeout(promise: Promise, timeoutMs: number, message: string) { + let timer: ReturnType | undefined; + try { + return await Promise.race([ + promise, + new Promise((_, reject) => { + timer = setTimeout(() => reject(new Error(message)), timeoutMs); + }), + ]); + } finally { + if (timer) { + clearTimeout(timer); + } + } +} + +function writeJson(value: unknown) { + process.stdout.write(`${JSON.stringify(value, null, 2)}\n`); +} + +function formatJson(value: unknown) { + return JSON.stringify(value, null, 2); +} + +function truncateText(value: string, limit: number) { + if (value.length <= limit) { + return value; + } + return `${value.slice(0, limit)}\n...[truncated ${value.length - limit} chars]`; +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function recordField(value: unknown, field: string) { + if (!isRecord(value)) { + return undefined; + } + const nested = value[field]; + return isRecord(nested) ? nested : undefined; +} + +function stringField(value: unknown, field: string) { + if (!isRecord(value)) { + return undefined; + } + const fieldValue = value[field]; + return typeof fieldValue === "string" ? fieldValue : undefined; +} + +function errorMessage(error: unknown) { + return error instanceof Error ? error.message : String(error); +} + +function printHelp() { + process.stdout.write( + [ + "Run the openai/codex release update flow inside a native Code Mode thread.", + "", + "Usage:", + " bun scripts/run-codex-release-update-thread.ts [options]", + "", + "Options:", + " --release-tag Use a specific openai/codex release tag instead of latest.", + " --force Run even when the handled npm package version exists.", + " --handled-npm-package Durable handled npm package. Defaults to " + + defaultHandledNpmPackage, + " --npm-registry npm registry URL. Defaults to " + + defaultNpmRegistry, + " --no-handled-npm-check Do not compare against a handled npm package.", + " --no-handled-release-check Alias for --no-handled-npm-check.", + " --codex-repo Local codex fork checkout. Defaults to " + defaultCodexRepo, + " --service-repo codex-flows checkout. Defaults to " + serviceRoot, + " --target-branch Fork branch to rebase. Defaults to code-mode-exec-hooks.", + " --upstream-repo GitHub release source. Defaults to openai/codex.", + " --upstream-remote Local remote name for upstream. Defaults to upstream.", + " --cargo-target-dir Cargo target dir. Defaults to " + defaultCargoTargetDir, + " --codex-command Explicit fork Codex binary used to start app-server.", + " Defaults to CODEX_APP_SERVER_CODEX_COMMAND.", + " With CODEX_FLOWS_MODE=code-mode, falls back to", + " bunx @peezy.tech/codex.", + " --codex-home CODEX_HOME for the spawned app-server.", + " --timeout-ms App-server request and flow timeout. Defaults to 1800000.", + " --name Thread name.", + " --ephemeral Create an ephemeral thread.", + " --no-stream Do not stream Code Mode output.", + " -h, --help Show this help.", + "", + "Exit codes:", + " 0 completed or skipped", + " 1 failed or blocked", + " 2 rebase conflict, with rebase state intentionally left paused", + "", + ].join("\n"), + ); +} + +await main().catch((error) => { + process.stderr.write(`${errorMessage(error)}\n`); + process.exitCode = 1; +});