experimental.chat.system.transform fires for task-spawned subagent sessions after the task prompt (user message) is already in the conversation. Pushing to output.system at that point places a system message at a non-zero position, which Qwen3.6's strict Jinja chat template rejects with 'System message must be at the beginning.' Move session-start.sh injection to chat.message (first turn guard via initializedSessions). Injected as a synthetic text part via unshift(), which has no position constraints.
316 lines
14 KiB
TypeScript
316 lines
14 KiB
TypeScript
import type { Plugin, TextPart } from "@opencode-ai/plugin";
|
||
import { resolve, dirname } from "node:path";
|
||
import { fileURLToPath } from "node:url";
|
||
|
||
/**
|
||
* Agent support plugin for Remnant.
|
||
*
|
||
* Responsibilities:
|
||
* 1. chat.message (first turn) — session-start.sh (once per session)
|
||
* 2. chat.message — user-prompt-submit.sh (each turn)
|
||
* 3. tool.execute.before — pre-tool-use.sh (project policy)
|
||
* 4. tool.execute.after — post-tool-use.sh + context pressure warning
|
||
* 5. experimental.session.compacting — pre-compact.sh
|
||
*
|
||
* Note: stop.sh has no equivalent OpenCode plugin event; it only fires in Copilot.
|
||
*/
|
||
|
||
// Approximate token estimate: 4 chars ≈ 1 token (conservative for code).
|
||
const CHARS_PER_TOKEN = 4;
|
||
const CONTEXT_LIMIT_TOKENS = 32768;
|
||
const PRESSURE_THRESHOLD = 0.7; // 70%
|
||
|
||
// build agent (local profile) truncates at 1500 tokens to respect OmniCoder's 32K context window.
|
||
// orchestrator gets a higher limit (2500) since it only reads, not edits.
|
||
// All other agents receive full tool responses.
|
||
const LOCAL_WORKER_MAX_TOKENS = 1500;
|
||
const LOCAL_ORCHESTRATOR_MAX_TOKENS = 2500;
|
||
|
||
function truncate(
|
||
text: string,
|
||
maxTokens: number,
|
||
): { text: string; truncated: boolean } {
|
||
const maxChars = maxTokens * CHARS_PER_TOKEN;
|
||
if (text.length <= maxChars) return { text, truncated: false };
|
||
return {
|
||
text:
|
||
text.slice(0, maxChars) +
|
||
`\n\n[Response truncated at ~${maxTokens} tokens. Use a more targeted query to retrieve the relevant section.]`,
|
||
truncated: true,
|
||
};
|
||
}
|
||
|
||
export const AgentSupportPlugin: Plugin = async ({ $, directory }) => {
|
||
// Resolve hooks relative to this plugin file's real path (resolves symlinks).
|
||
// This makes the plugin work both as a project-local plugin and as a global
|
||
// plugin installed via install.sh — in either case, hooks live in ../../hooks/
|
||
// relative to this file in the .agents/frameworks/opencode/ directory.
|
||
const hooksDir = resolve(
|
||
dirname(fileURLToPath(import.meta.url)),
|
||
"../../hooks",
|
||
);
|
||
|
||
// Running cumulative context size estimate (characters)
|
||
let contextCharsUsed = 0;
|
||
|
||
// Track sessions that have had session-start injected (fires once per session)
|
||
const initializedSessions = new Set<string>();
|
||
/** Parse the additionalContext string from a hook's JSON output. */
|
||
function parseAdditionalContext(hookOutput: string): string | undefined {
|
||
try {
|
||
const parsed = JSON.parse(hookOutput.trim()) as {
|
||
hookSpecificOutput?: { additionalContext?: string };
|
||
};
|
||
return parsed?.hookSpecificOutput?.additionalContext ?? undefined;
|
||
} catch (_error) {
|
||
return undefined;
|
||
}
|
||
}
|
||
|
||
async function runHook(
|
||
scriptName: string,
|
||
stdinJson?: string,
|
||
): Promise<string> {
|
||
const script = `${hooksDir}/${scriptName}`;
|
||
try {
|
||
const proc = stdinJson
|
||
? await $`bash ${script} < ${Buffer.from(stdinJson)}`.text()
|
||
: await $`bash ${script}`.text();
|
||
return proc;
|
||
} catch (_error) {
|
||
// DEBUG: log hook failures so silent catches don't hide enforcement bugs
|
||
try {
|
||
const fs = await import("node:fs");
|
||
fs.appendFileSync(
|
||
"/tmp/plugin-hook-errors.log",
|
||
JSON.stringify({
|
||
ts: new Date().toISOString(),
|
||
script,
|
||
error: String(_error),
|
||
}) + "\n",
|
||
);
|
||
} catch (_e) {
|
||
// ignore
|
||
}
|
||
// Hooks are advisory — never block on hook failure
|
||
return "";
|
||
}
|
||
}
|
||
|
||
return {
|
||
// ── 1 & 2. Session start + user prompt ──────────────────────────────────
|
||
// Session-start was previously injected via experimental.chat.system.transform
|
||
// (pushing to output.system). That caused a Jinja "System message must be at
|
||
// the beginning" error on Qwen-family local models when the orchestrator spawns
|
||
// a subagent via `task`: system.transform fires after the task prompt (a user
|
||
// message) is already in the conversation, so the system push lands at a
|
||
// non-zero position. Injecting as a synthetic text part on the first
|
||
// chat.message turn avoids the position constraint entirely.
|
||
"chat.message": async (input, output) => {
|
||
const sessionID = input.sessionID ?? "unknown";
|
||
|
||
// Session-start injection — runs exactly once per session, prepended so it
|
||
// reads before the user-prompt-submit nudges on the first turn.
|
||
if (!initializedSessions.has(sessionID)) {
|
||
initializedSessions.add(sessionID);
|
||
const startOutput = await runHook("session-start.sh");
|
||
const startContext = parseAdditionalContext(startOutput);
|
||
if (startContext) {
|
||
output.parts.unshift({
|
||
id: `prt_${crypto.randomUUID()}`,
|
||
sessionID: input.sessionID,
|
||
messageID: input.messageID ?? crypto.randomUUID(),
|
||
type: "text",
|
||
text: startContext,
|
||
synthetic: true,
|
||
});
|
||
}
|
||
}
|
||
|
||
const promptText = output.parts
|
||
.filter((p): p is TextPart => p.type === "text")
|
||
.map((p) => p.text)
|
||
.join("\n");
|
||
const hookOutput = await runHook(
|
||
"user-prompt-submit.sh",
|
||
JSON.stringify({ prompt: promptText }),
|
||
);
|
||
const context = parseAdditionalContext(hookOutput);
|
||
if (context) {
|
||
output.parts.push({
|
||
id: `prt_${crypto.randomUUID()}`,
|
||
sessionID: input.sessionID,
|
||
messageID: input.messageID ?? crypto.randomUUID(),
|
||
type: "text",
|
||
text: context,
|
||
synthetic: true,
|
||
});
|
||
}
|
||
},
|
||
// ── 3. Pre-tool-use ─────────────────────────────────────────────────────
|
||
"tool.execute.before": async (input, output) => {
|
||
const toolName = input.tool as string;
|
||
|
||
// ── read guards ───────────────────────────────────────────────────
|
||
if (toolName === "read") {
|
||
const args = (output.args ?? {}) as {
|
||
filePath?: string;
|
||
offset?: number;
|
||
limit?: number;
|
||
};
|
||
const filePath = args.filePath ?? "";
|
||
|
||
// package.json read guard:
|
||
// Reading workspace package.json files auto-loads nested AGENTS.md files
|
||
// via OpenCode's context injection, burning through the 32K context budget.
|
||
// Block package.json reads under apps/ and packages/ only.
|
||
if (/(^|\/)(apps|packages)\/[^/]+\/package\.json$/.test(filePath)) {
|
||
throw new Error(
|
||
"BLOCKED: Reading workspace package.json files auto-loads nested AGENTS.md files and exhausts the 32K context. Use `grep_search` to find the specific field you need (e.g. a dependency version or script name) instead of reading the whole file.",
|
||
);
|
||
}
|
||
|
||
// Pagination guard:
|
||
// Large sequential reads exhaust the 32K context window quickly.
|
||
// The OpenCode `read` tool uses `offset` (1-indexed start) and `limit` (max lines).
|
||
// Unbounded reads (no limit) default to 2000 lines — always blocked.
|
||
// docs/ files may read up to 500 lines; all other files are capped at 50.
|
||
// Directory reads (e.g. `Read .`) never carry a limit — skip the guard.
|
||
let isDirectory = false;
|
||
try {
|
||
const { statSync } = await import("node:fs");
|
||
isDirectory = statSync(filePath).isDirectory();
|
||
} catch (_error) {
|
||
// path doesn't exist or inaccessible — treat as file
|
||
}
|
||
if (!isDirectory) {
|
||
const isDocsFile = /(^|\/)docs\//.test(filePath);
|
||
const readLimit: number | undefined = args.limit;
|
||
if (readLimit === undefined) {
|
||
throw new Error(
|
||
isDocsFile
|
||
? `BLOCKED: Unbounded read (no limit) is prohibited. Specify offset and limit to read in ≤500-line chunks for docs/ files.`
|
||
: `BLOCKED: Unbounded read (no limit) is prohibited. Use grep_search first to find the relevant section, then read with offset and limit in ≤50-line chunks.`,
|
||
);
|
||
}
|
||
const lineLimit = isDocsFile ? 500 : 50;
|
||
if (readLimit > lineLimit) {
|
||
throw new Error(
|
||
isDocsFile
|
||
? `BLOCKED: Read more than 500 lines at once is prohibited for docs/ files. Use offset and limit to paginate in ≤500-line chunks.`
|
||
: `BLOCKED: Read more than 50 lines at once is prohibited. Use offset and limit to paginate in ≤50-line chunks. For docs/ files the limit is 500 lines. Use grep_search first to find the right offset.`,
|
||
);
|
||
}
|
||
}
|
||
}
|
||
|
||
// ── Task prompt size guard ─────────────────────────────────────────────
|
||
// The `task` tool has a JSON serialization limit. Embedding file contents
|
||
// or long inventories inline in a task prompt causes "Unterminated string"
|
||
// parse errors. Cap task prompts at 1200 chars — workers should be told
|
||
// WHICH files to read, not given the contents inline.
|
||
if (toolName === "task") {
|
||
const args = (output.args ?? {}) as { prompt?: string };
|
||
const prompt = args.prompt ?? "";
|
||
if (prompt.length > 1200) {
|
||
throw new Error(
|
||
`BLOCKED (task prompt too long: ${prompt.length} chars, max 1200): Task prompts must not embed file contents, dependency lists, or long context inline — this causes JSON parse failures. Instead, tell the worker WHICH files to read and WHAT to do. Example: "Read the root package.json and all workspace package.json files, then update the Technology Stack section in README.md to match."`,
|
||
);
|
||
}
|
||
}
|
||
|
||
// Shell out to pre-tool-use hook (project policy enforcement).
|
||
// Policies 1–12: command/file guards. Policy 13: read_file range limit
|
||
// (≤50 lines for source files, ≤500 for docs/). Deny = throws Error.
|
||
const hookInput = JSON.stringify({
|
||
tool_name: toolName,
|
||
tool_input: output.args ?? {},
|
||
});
|
||
const hookResult = await runHook("pre-tool-use.sh", hookInput);
|
||
|
||
// If the hook emitted a deny decision, surface it as an error
|
||
if (hookResult.includes('"permissionDecision": "deny"')) {
|
||
const match = hookResult.match(
|
||
/"permissionDecisionReason":\s*"([^"]+)"/,
|
||
);
|
||
const reason =
|
||
match?.[1] ?? "Blocked by project policy (pre-tool-use hook).";
|
||
throw new Error(reason);
|
||
}
|
||
},
|
||
|
||
// ── 4. Post-tool-use ────────────────────────────────────────────────────
|
||
"tool.execute.after": async (input, output) => {
|
||
const response = output.response as string | undefined;
|
||
|
||
if (typeof response === "string") {
|
||
// a) Response truncation — local agents (build/orchestrator) and any ollama/ model;
|
||
// orchestrator gets a higher limit since it only reads, not edits.
|
||
const agentName = typeof input.agent === "string" ? input.agent : "";
|
||
const isLocalAgent =
|
||
agentName === "build" ||
|
||
agentName === "orchestrator" ||
|
||
(typeof input.model === "string" &&
|
||
input.model.startsWith("ollama/"));
|
||
if (isLocalAgent) {
|
||
const isOrchestrator = agentName === "orchestrator";
|
||
const maxTokens = isOrchestrator
|
||
? LOCAL_ORCHESTRATOR_MAX_TOKENS
|
||
: LOCAL_WORKER_MAX_TOKENS;
|
||
const { text: truncated } = truncate(response, maxTokens);
|
||
output.response = truncated;
|
||
}
|
||
|
||
// b) Context pressure tracking — accumulate and inject warning when ≥70%
|
||
contextCharsUsed += response.length;
|
||
const charLimit = CONTEXT_LIMIT_TOKENS * CHARS_PER_TOKEN;
|
||
const pct = contextCharsUsed / charLimit;
|
||
|
||
if (pct >= PRESSURE_THRESHOLD) {
|
||
const pctDisplay = Math.round(pct * 100);
|
||
const pressure = `[CONTEXT PRESSURE: ~${pctDisplay}% used. Be concise. Prefer targeted tool calls. Write progress to NOTES.md before continuing.]`;
|
||
output.response = `${pressure}\n\n${output.response}`;
|
||
// Reset after injection so we don't spam every subsequent turn
|
||
contextCharsUsed = 0;
|
||
}
|
||
|
||
// c) Shell out to post-tool-use hook (metacognitive reminders, methodology)
|
||
const hookInput = JSON.stringify({
|
||
tool_name: input.tool,
|
||
tool_input: input.args ?? {},
|
||
tool_response: (output.response as string).slice(0, 500), // truncated for hook
|
||
});
|
||
await runHook("post-tool-use.sh", hookInput);
|
||
}
|
||
},
|
||
|
||
// ── 5. Pre-compact: export state before context summarization ─────────────
|
||
"experimental.session.compacting": async (input, output) => {
|
||
await runHook("pre-compact.sh");
|
||
|
||
output.prompt = `
|
||
You are a context summarizer for coding sessions. Summarize only the conversation history given — do not answer it.
|
||
|
||
If a <previous-summary> block is present, update it: preserve still-true facts, remove stale ones, merge new facts.
|
||
|
||
Output exactly this Markdown structure. Keep every section even when empty. Use terse bullets, not prose. Preserve exact file paths, commands, error strings, and identifiers.
|
||
|
||
---
|
||
## Original Prompt
|
||
## Clarifications
|
||
## Constraints & Preferences
|
||
## Progress
|
||
### Done
|
||
### In Progress
|
||
### Blocked
|
||
## Key Decisions
|
||
## Next Steps
|
||
## Critical Context
|
||
## Relevant Files
|
||
---
|
||
|
||
For Clarifications: include only follow-ups that changed scope, added constraints, or redirected work. Do not mention that you are summarizing. Respond in the conversation's language.`;
|
||
},
|
||
};
|
||
};
|