From 85059818895b18c70e1dc0e7052181446a2f98aa Mon Sep 17 00:00:00 2001 From: Eliot M Date: Wed, 29 Apr 2026 14:44:47 +0000 Subject: [PATCH] feat(self-edit): make POST /api/v1/self/deploy synchronous MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old behaviour returned 202 with a deployId and expected the agent to poll GET /deploy/:id — but that route lives on the Better-Auth-gated deploy module, so localhost curl from inside the container got 401 on every poll. The agent had no terminal signal and no log visibility, matching the symptoms Eliot observed (stale-looking session, repeated 401s in core-back logs, no build output). Mirror old-Cial's ergonomics: one synchronous request, blocks until the build (and post-build restart) reach a terminal state, returns { ok, status, durationMs, exitCode, errorSummary, logTail } with HTTP 200 on success or 500 on failure. logTail is the last 8KB of the deploy log file so the agent has the failure context inline without needing a second round-trip. - DeployService.waitForDone(deployId, timeoutMs): EventEmitter-based promise that resolves on the next 'done'/'cancelled' event for the given deployId, or immediately if the row is already terminal. - DeployService.getLogPath(deployId) + DeployRepository.getLogPath: surface the persisted log_path for tail reading. - self/router.ts: await waitForDone, read log tail, respond once. - cial:build skills (restricted + unrestricted): drop the polling loop, document the synchronous response shape. Co-Authored-By: Claude Opus 4.6 --- .claude/skills.src/cial:build/restricted.md | 33 +++++------ .claude/skills.src/cial:build/unrestricted.md | 33 +++++------ core/back/src/modules/deploy/repository.ts | 7 +++ core/back/src/modules/deploy/service.ts | 40 +++++++++++++ core/back/src/modules/self/router.ts | 59 ++++++++++++++++--- 5 files changed, 127 insertions(+), 45 deletions(-) diff --git a/.claude/skills.src/cial:build/restricted.md b/.claude/skills.src/cial:build/restricted.md index e69e3d9..f345d08 100644 --- a/.claude/skills.src/cial:build/restricted.md +++ b/.claude/skills.src/cial:build/restricted.md @@ -14,29 +14,25 @@ description: Build platform code inside the dev container (RESTRICTED mode). Alw ## Endpoint `POST http://127.0.0.1:4000/api/v1/self/deploy` — localhost-gated, no auth. Hit core-back directly on port 4000; edge does **not** proxy `/api/v1/*`. +**Synchronous: the request blocks until the build (and post-build restart) reach a terminal state.** No polling. No separate status endpoint. + ## Steps -1. Kick off the build: +1. Run the build (this can take 30s–3min — be patient, do not retry): ```bash - curl -sf -X POST http://127.0.0.1:4000/api/v1/self/deploy \ + curl -sS --max-time 600 -X POST http://127.0.0.1:4000/api/v1/self/deploy \ -H 'content-type: application/json' \ -d "{\"scope\":\"auto\",\"sessionId\":\"$CIAL_SESSION_ID\"}" ``` - Response (202): `{ "ok": true, "deployId": "", "status": "queued"|"building", "scope": "platform", "logPath": "...", "buildFilter": [...] }` -2. Capture `deployId` and `logPath`. -3. Poll until terminal: - ```bash - while :; do - status=$(curl -sf http://127.0.0.1:4000/deploy/$DEPLOY_ID | jq -r .deploy.status) - case "$status" in ok|error|cancelled) echo "$status"; break ;; esac - sleep 2 - done - ``` -4. On `error`: - ```bash - curl -sf http://127.0.0.1:4000/deploy/$DEPLOY_ID | jq .deploy.errorSummary - tail -n 80 "$LOG_PATH" - ``` - Fix the errors and re-run from step 1. +2. Read the response. + - Success (HTTP 200): + ```json + { "ok": true, "deployId": "...", "status": "ok", "scope": "platform", "durationMs": 42103, "exitCode": 0, "errorSummary": null, "logTail": "..." } + ``` + - Failure (HTTP 500): + ```json + { "ok": false, "deployId": "...", "status": "error", "scope": "platform", "durationMs": ..., "exitCode": 1, "errorSummary": "...", "logTail": "" } + ``` + Read `errorSummary` + `logTail`, fix the code, re-run from step 1. ## Scope - `scope: "auto"` (default) → `platform`. @@ -46,7 +42,6 @@ description: Build platform code inside the dev container (RESTRICTED mode). Alw ## Errors - `403 not_localhost` — should never happen from inside the container. - `403 unrestricted_required` — you passed `scope: "all"`. Stick to `auto` or `platform`. -- `409 build_in_progress` — only with `?wait=false`. Default behavior coalesces. ## See also - `docs/self-edit/api.md` — full contract. diff --git a/.claude/skills.src/cial:build/unrestricted.md b/.claude/skills.src/cial:build/unrestricted.md index 668c6d4..83f35f1 100644 --- a/.claude/skills.src/cial:build/unrestricted.md +++ b/.claude/skills.src/cial:build/unrestricted.md @@ -14,29 +14,25 @@ description: Build any package inside the dev container (UNRESTRICTED mode). Def ## Endpoint `POST http://127.0.0.1:4000/api/v1/self/deploy` — localhost-gated, no auth. Hit core-back directly on port 4000; edge does **not** proxy `/api/v1/*`. +**Synchronous: the request blocks until the build (and post-build restart) reach a terminal state.** No polling. No separate status endpoint. + ## Steps -1. Kick off the build: +1. Run the build (a full `scope: "all"` rebuild can take several minutes — be patient, do not retry): ```bash - curl -sf -X POST http://127.0.0.1:4000/api/v1/self/deploy \ + curl -sS --max-time 600 -X POST http://127.0.0.1:4000/api/v1/self/deploy \ -H 'content-type: application/json' \ -d "{\"scope\":\"auto\",\"sessionId\":\"$CIAL_SESSION_ID\"}" ``` - Response (202): `{ "ok": true, "deployId": "", "status": "queued"|"building", "scope": "all", "logPath": "...", "buildFilter": [...] }` -2. Capture `deployId` and `logPath`. -3. Poll until terminal: - ```bash - while :; do - status=$(curl -sf http://127.0.0.1:4000/deploy/$DEPLOY_ID | jq -r .deploy.status) - case "$status" in ok|error|cancelled) echo "$status"; break ;; esac - sleep 2 - done - ``` -4. On `error`: - ```bash - curl -sf http://127.0.0.1:4000/deploy/$DEPLOY_ID | jq .deploy.errorSummary - tail -n 80 "$LOG_PATH" - ``` - Fix the errors and re-run from step 1. +2. Read the response. + - Success (HTTP 200): + ```json + { "ok": true, "deployId": "...", "status": "ok", "scope": "all", "durationMs": 142103, "exitCode": 0, "errorSummary": null, "logTail": "..." } + ``` + - Failure (HTTP 500): + ```json + { "ok": false, "deployId": "...", "status": "error", "scope": "all", "durationMs": ..., "exitCode": 1, "errorSummary": "...", "logTail": "" } + ``` + Read `errorSummary` + `logTail`, fix the code, re-run from step 1. ## Scope - `scope: "auto"` (default) → `all`. @@ -45,7 +41,6 @@ description: Build any package inside the dev container (UNRESTRICTED mode). Def ## Errors - `403 not_localhost` — should never happen from inside the container. -- `409 build_in_progress` — only with `?wait=false`. Default behavior coalesces. ## See also - `docs/self-edit/api.md` — full contract. diff --git a/core/back/src/modules/deploy/repository.ts b/core/back/src/modules/deploy/repository.ts index 724d539..69bbc3b 100644 --- a/core/back/src/modules/deploy/repository.ts +++ b/core/back/src/modules/deploy/repository.ts @@ -86,6 +86,13 @@ export class DeployRepository { return row ? rowToDto(row) : null; } + getLogPath(id: string): string | null { + const row = this.db.prepare('SELECT log_path FROM deploy WHERE id = ?').get(id) as + | { log_path: string } + | undefined; + return row?.log_path ?? null; + } + list(limit = 20): DeployRow[] { const rows = this.db .prepare('SELECT * FROM deploy ORDER BY started_at DESC LIMIT ?') diff --git a/core/back/src/modules/deploy/service.ts b/core/back/src/modules/deploy/service.ts index 63da0a9..451ef55 100644 --- a/core/back/src/modules/deploy/service.ts +++ b/core/back/src/modules/deploy/service.ts @@ -117,10 +117,50 @@ export class DeployService extends EventEmitter { return this.opts.repo.get(id); } + getLogPath(id: string): string | null { + return this.opts.repo.getLogPath(id); + } + list(limit?: number): DeployRow[] { return this.opts.repo.list(limit); } + /** + * Resolve once the given deploy reaches a terminal state (`ok`, `error`, + * `cancelled`). Used by the synchronous self-edit endpoint so the agent + * gets a single blocking response instead of having to poll. + * + * If the deploy is already terminal, returns the row immediately. Rejects + * with `deploy_timeout` after `timeoutMs` (default 10 min). + */ + waitForDone(deployId: string, timeoutMs = 10 * 60 * 1000): Promise { + const existing = this.opts.repo.get(deployId); + if (existing && this.isTerminal(existing.status)) { + return Promise.resolve(existing); + } + return new Promise((resolve, reject) => { + const finish = () => { + clearTimeout(timer); + this.off('done', onDone); + this.off('cancelled', onCancelled); + resolve(this.opts.repo.get(deployId)); + }; + const onDone = (e: DeployDoneEvt) => { if (e.deployId === deployId) finish(); }; + const onCancelled = (e: { deployId: string }) => { if (e.deployId === deployId) finish(); }; + const timer = setTimeout(() => { + this.off('done', onDone); + this.off('cancelled', onCancelled); + reject(new Error('deploy_timeout')); + }, timeoutMs); + this.on('done', onDone); + this.on('cancelled', onCancelled); + }); + } + + private isTerminal(status: string): boolean { + return status === 'ok' || status === 'error' || status === 'cancelled'; + } + getMode(): DeployMode { return this.opts.repo.getMode(); } diff --git a/core/back/src/modules/self/router.ts b/core/back/src/modules/self/router.ts index 7e89151..d5a3ef7 100644 --- a/core/back/src/modules/self/router.ts +++ b/core/back/src/modules/self/router.ts @@ -19,6 +19,7 @@ */ import { Router } from 'express'; +import { open } from 'node:fs/promises'; import type { Logger } from 'pino'; import { ValidationError } from '../../infrastructure/errors.js'; import { localhostOnly } from './localhost-only.js'; @@ -26,6 +27,28 @@ import type { DeployService } from '../deploy/service.js'; import type { BuildScope } from '../deploy/runner.js'; import type { RestartScope } from '../deploy/supervisor-client.js'; +/** + * Read the last `maxBytes` of a log file. Used to inline a tail of the + * build log into the synchronous deploy response so the agent has the + * failure context without having to fetch a separate endpoint. + */ +async function readLogTail(logPath: string, maxBytes = 8 * 1024): Promise { + let fh: Awaited> | null = null; + try { + fh = await open(logPath, 'r'); + const stat = await fh.stat(); + const size = stat.size; + const start = size > maxBytes ? size - maxBytes : 0; + const buf = Buffer.alloc(size - start); + await fh.read(buf, 0, buf.length, start); + return buf.toString('utf8'); + } catch { + return null; + } finally { + await fh?.close().catch(() => {}); + } +} + export interface SelfRouterOpts { logger: Logger; deployService: DeployService; @@ -59,7 +82,10 @@ export function createSelfRouter(opts: SelfRouterOpts): Router { router.use(localhostOnly()); // ── POST /api/v1/self/deploy ───────────────────────────────────────── - router.post('/deploy', (req, res, next) => { + // Synchronous: blocks until the build (and post-build restart) reach a + // terminal state. The agent gets ONE answer it can act on — no polling, + // no separate auth-gated GET endpoint. Old-Cial-style ergonomics. + router.post('/deploy', async (req, res, next) => { try { const body = (req.body ?? {}) as Record; const apiScope = parseScope(body.scope); @@ -76,18 +102,37 @@ export function createSelfRouter(opts: SelfRouterOpts): Router { } const scope = resolveBuildScope(apiScope, opts.unrestricted); - const result = opts.deployService.start({ + const started = opts.deployService.start({ requestedByUserId: SELF_USER_ID, sessionId, modeOverride: mode, scope, }); - res.status(202).json({ - ok: true, - deployId: result.deployId, - status: result.status, - scope: result.scope, + let finalRow = null; + try { + finalRow = await opts.deployService.waitForDone(started.deployId); + } catch (waitErr) { + opts.logger.warn( + { err: waitErr, deployId: started.deployId }, + 'self/deploy wait failed', + ); + } + + const logPath = opts.deployService.getLogPath(started.deployId); + const logTail = logPath ? await readLogTail(logPath) : null; + const status = finalRow?.status ?? 'unknown'; + const ok = status === 'ok'; + + res.status(ok ? 200 : 500).json({ + ok, + deployId: started.deployId, + status, + scope: started.scope, + durationMs: finalRow?.durationMs ?? null, + exitCode: finalRow?.exitCode ?? null, + errorSummary: finalRow?.errorSummary ?? (ok ? null : 'deploy did not reach terminal state'), + logTail, }); } catch (err) { if (err instanceof Error && err.message === 'unrestricted_required') {