fix: move more into install.sh

model config, downloads, llama-server service and a README.md to explain --host
2026-06-08 20:38:24 -04:00 · 2026-06-08 20:38:24 -04:00 · 2a00366204
commit 2a00366204
parent d237fcfcd3
8 changed files with 493 additions and 25 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,39 @@
+# dotfiles
+
+Personal dotfiles and AI-agent infrastructure for VS Code Copilot and OpenCode.
+
+## Quick Start
+
+For host machines, install dotfiles plus llama-server config and systemd services via the `--host` flag:
+
+```bash
+git clone https://github.com/username/dotfiles ~/dotfiles
+~/dotfiles/install.sh --host
+```
+
+If using devcontainers, drop the `--host` flag in the Dockerfile or just rely on vscode settings or, possibly better, a devcontainer "features" config such as:
+
+```json
+"features": {
+    "ghcr.io/willfantom/features/dotfiles:1": {
+      "repository": "git@git.bcdewitt.ddns.net:bcdewitt/dotfiles.git",
+      "targetPath": "~/dotfiles",
+      "installCommand": "install.sh"
+    }
+}
+```
+
+## What Gets Installed
+
+**Basic install** (`install.sh`):
+- Agent hooks wired into VS Code Copilot and OpenCode (the `.agents/` infrastructure)
+- OpenCode config symlinked to `~/.config/opencode/opencode.json`
+
+**Host install** (`install.sh --host`):
+- Everything in basic install, plus:
+- llama-server presets, startup script, and systemd units from `config/llama-server/`
+
+## Idempotent
+
+The install script is idempotent — safe to re-run at any time. It skips steps that
+are already in place and only changes what needs updating.
--- a/config/llama-server/llama-server-presets.path
+++ b/config/llama-server/llama-server-presets.path
@ -0,0 +1,8 @@
+[Unit]
+Description=Restart llama-server when presets.ini changes
+
+[Path]
+PathModified=/home/dev/models/presets.ini
+
+[Install]
+WantedBy=default.target
--- a/config/llama-server/llama-server-presets.service
+++ b/config/llama-server/llama-server-presets.service
@ -0,0 +1,6 @@
+[Unit]
+Description=Restart llama-server (triggered by presets.ini change)
+
+[Service]
+Type=oneshot
+ExecStart=/bin/systemctl restart llama-server
--- a/config/llama-server/llama-server.service
+++ b/config/llama-server/llama-server.service
@ -0,0 +1,15 @@
+[Unit]
+Description=llama-server
+After=network-online.target
+Wants=network-online.target
+
+[Service]
+ExecStart=/opt/llama-server/start.sh
+User=ollama
+Group=ollama
+Restart=always
+RestartSec=3
+Environment="PATH=/home/dev/.nvm/versions/node/v24.15.0/bin:/home/dev/.opencode/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/usr/lib/wsl/lib:/mnt/c/Program Files (x86)/NVIDIA Corporation/PhysX/Common:/mnt/c/Python312/Scripts/:/mnt/c/Python312/:/mnt/c/Program Files/Microsoft/jdk-17.0.8.7-hotspot/bin:/mnt/c/Program Files/Oculus/Support/oculus-runtime:/mnt/c/Windows/system32:/mnt/c/Windows:/mnt/c/Windows/System32/Wbem:/mnt/c/Windows/System32/WindowsPowerShell/v1.0/:/mnt/c/Windows/System32/OpenSSH/:/mnt/c/Program Files/dotnet/:/mnt/c/Program Files/Microsoft VS Code/bin:/mnt/c/WINDOWS/system32:/mnt/c/WINDOWS:/mnt/c/WINDOWS/System32/Wbem:/mnt/c/WINDOWS/System32/WindowsPowerShell/v1.0/:/mnt/c/WINDOWS/System32/OpenSSH/:/mnt/c/Program Files/nodejs/:/mnt/c/ProgramData/chocolatey/bin:/mnt/c/Users/Dev/AppData/Local/Programs/cursor/resources/app/bin:/mnt/c/WINDOWS/system32:/mnt/c/WINDOWS:/mnt/c/WINDOWS/System32/Wbem:/mnt/c/WINDOWS/System32/WindowsPowerShell/v1.0/:/mnt/c/WINDOWS/System32/OpenSSH/:/mnt/c/Program Files/Docker/Docker/resources/bin:/mnt/c/Users/Dev/AppData/Local/Microsoft/WindowsApps:/mnt/c/Users/Dev/AppData/Roaming/npm:/mnt/c/Users/Dev/.dotnet/tools:/mnt/c/Users/Dev/AppData/Local/Microsoft/WinGet/Packages/albertony.npiperelay_Microsoft.Winget.Source_8wekyb3d8bbwe:/mnt/c/Users/Dev/.lmstudio/bin:/snap/bin"
+
+[Install]
+WantedBy=default.target
--- a/config/llama-server/presets.ini
+++ b/config/llama-server/presets.ini
@ -0,0 +1,147 @@
+version = 1
+
+; ─── Global ──────────────────────────────────────────────────────────────────
+; Settings in [*] are inherited by every model loaded by the router.
+; Per-model sections below override individual keys.
+[*]
+
+; Number of model layers to offload to GPU.
+; 99 means "offload everything" — llama.cpp loads as many as fit and falls back
+; to CPU automatically for any overflow. Using an explicit value avoids the
+; occasional conservative auto-estimate.
+; Default: auto
+; n-gpu-layers = 99
+
+; Flash Attention: reduces KV-cache VRAM usage and speeds up long-context
+; inference by computing attention without materializing the full NxN matrix.
+; "on" forces it; "auto" (default) enables it when CUDA is detected — same
+; effect in practice, but explicit is clearer here.
+; Default: auto
+flash-attn = on
+
+; Number of CPU threads used for non-GPU work: tokenization, sampling, and any
+; layers that overflow VRAM during hybrid inference. ~2/3 of physical cores is
+; the rule of thumb; going higher causes contention on the same cores the GPU
+; DMA uses. (Machine has 12 logical cores → 8 threads.)
+; Default: -1 (use all cores)
+threads = 8
+
+; Number of inference slots (parallel sequences). 1 = single-user server with
+; no batching overhead. Increase only if you need concurrent requests; each
+; extra slot consumes a proportional share of KV-cache VRAM.
+; Default: -1 (auto, usually 1)
+parallel = 1
+
+; Jinja2 chat templating — required for models with complex chat templates
+; (e.g. Qwen3, which uses raise_exception() guards). Without this, llama.cpp
+; falls back to a static PEG auto-parser that can't handle those templates.
+jinja = on
+
+; Token budget for chain-of-thought reasoning.
+;   -1 = unrestricted (model decides when to stop thinking)
+;    0 = disable thinking entirely
+;   N  = hard cap at N tokens, then force the model to answer
+; Commented out: matches the default (-1 = unrestricted).
+; reasoning-budget = -1
+
+ctx-size = 32768
+n-predict = 4096
+
+; ─── Qwen3-14B ───────────────────────────────────────────────────────────────
+; ~8.5 GB GGUF — fits fully in 12 GB VRAM. Fast (~12–18 tok/s). Good daily
+; driver for interactive coding and Q&A.
+[Qwen_Qwen3-14B-Q4_K_M]
+
+; Full 32 K context is safe: 14B fits in VRAM with plenty of headroom for the
+; KV cache. At 32 K × 2 bytes × 2 (K+V) × 40 layers ≈ ~5 GB worst-case KV.
+; Default: 0 (read from model metadata, typically the training context limit)
+ctx-size = 32768
+
+; Cap generation at 4096 tokens. Prevents runaway responses; raise if you need
+; longer output (documentation, large refactors). Default: -1 (unlimited)
+n-predict = 4096
+
+
+; ─── OmniCoder-2-9B ──────────────────────────────────────────────────────────
+; ~9.4 GB GGUF — fits fully in 12 GB VRAM. Fast generation. Vision-capable
+; (multimodal projector at OmniCoder-2-9B.Q8_0/mmproj-Q8_0.gguf — auto-detected
+; from subdirectory layout by the router).
+[OmniCoder-2-9B.Q8_0]
+
+; Full 32 K context fits comfortably alongside 9B weights.
+; Default: 0 (read from model metadata)
+ctx-size = 32768
+
+; Cap generation at 4096 tokens. Default: -1 (unlimited)
+n-predict = 4096
+
+
+; ─── Qwen3.6-35B-A3B (MoE + MTP) ────────────────────────────────────────────
+; 13.6 GB GGUF — ~12 GB on GPU, ~1.6 GB CPU offload on a 12 GB card.
+; MoE model: only ~3B parameters active per forward pass despite 35B total.
+; MTP (multi-token prediction) heads baked in — uses draft-mtp speculative
+; decoding to roughly double throughput vs non-speculative. Requires b9279+.
+[Qwen3.6-35B-A3B-IQ3_S-3.06bpw]
+
+; KV cache is small (~31 MiB/1K tokens) due to GQA — 32K context only needs
+; ~1 GB KV cache, which pages to CPU gracefully without major throughput loss.
+ctx-size = 32768
+
+; Cap generation at 4096 tokens. Default: -1 (unlimited)
+n-predict = 4096
+
+; Multi-token prediction speculative decoding.
+; spec-type = draft-mtp uses MTP heads built into the model weights.
+spec-type = draft-mtp
+
+; Minimum acceptance probability for a speculated draft token (0–1).
+; 0.75 = accept tokens the model is 75%+ confident in. Lower = more aggressive
+; speculation (faster but slightly more divergence risk).
+spec-draft-p-min = 0.75
+
+; Max tokens to speculate per step. 3 is the sweet spot for Qwen3.6 MTP.
+spec-draft-n-max = 3
+
+
+; ─── Qwen3.6-27B ─────────────────────────────────────────────────────────────
+; 17 GB GGUF — ~12 GB on GPU, ~5 GB CPU offload on a 12 GB card.
+; Slower (~4–8 tok/s) due to CPU↔GPU transfers; best for deep analysis tasks.
+[Qwen_Qwen3.6-27B-Q4_K_M]
+
+; Smaller context than 14B to keep the KV cache on-GPU. At 16 K the KV cache
+; is roughly half the size, which reduces how much spills to CPU on each
+; forward pass — meaningful when every byte of VRAM is already spoken for.
+; Default: 0 (read from model metadata)
+ctx-size = 16384
+
+; Cap generation at 4096 tokens. Default: -1 (unlimited)
+n-predict = 4096
+
+[Qwopus3.6-27B-v2-MTP-Q4_K_M]
+
+ctx-size = 32768
+n-predict = 4096
+spec-type = draft-mtp
+spec-draft-p-min = 0.75
+spec-draft-n-max = 3
+
+[Qwopus3.6-35B-A3B-v1-MTP-Q4_K_M]
+
+ctx-size = 32768
+n-predict = 4096
+spec-type = draft-mtp
+spec-draft-p-min = 0.75
+spec-draft-n-max = 3
+
+[Qwopus3.5-9B-Coder-MTP-Q8_0]
+
+ctx-size = 65536
+n-predict = 4096
+spec-type = draft-mtp
+spec-draft-p-min = 0.75
+spec-draft-n-max = 3
+
+[agentica-org_DeepCoder-14B-Preview-Q5_K_M]
+
+ctx-size = 32768
+n-predict = 4096
--- a/config/llama-server/start.sh
+++ b/config/llama-server/start.sh
@ -0,0 +1,9 @@
+#!/bin/bash
+export LD_LIBRARY_PATH=/opt/llama-server${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
+cd /opt/llama-server
+exec /opt/llama-server/llama-server \
+  --models-dir /home/dev/models \
+  --models-max 1 \
+  --models-preset /home/dev/models/presets.ini \
+  --host 127.0.0.1 \
+  --port 8080
--- a/config/opencode/opencode.json
+++ b/config/opencode/opencode.json
@ -0,0 +1,172 @@
+{
+  "$schema": "https://opencode.ai/config.json",
+  "default_agent": "orchestrator",
+  "compaction": {
+    "reserved": 3000
+  },
+  "agent": {
+    "orchestrator": {
+      "mode": "all",
+      "model": "llama-server/Qwopus3.6-27B-v2-MTP-Q4_K_M",
+      "permission": {
+        "edit": "deny",
+        "bash": {
+          "*": "deny",
+          "* /tmp/.last-user-prompt.txt": "allow",
+          "* /tmp/.last-user-prompt.txt << *": "allow"
+        }
+      }
+    },
+    "build": {
+      "mode": "subagent",
+      "permission": {
+        "webfetch": "deny",
+        "websearch": "deny",
+        "question": "deny",
+        "todowrite": "deny",
+        "skill": "deny"
+      }
+    },
+    "research": {
+      "mode": "all",
+      "permission": {
+        "*": "allow"
+      }
+    }
+  },
+  "permission": {
+    "external_directory": {
+      "/tmp/**": "allow",
+      "~/dotfiles/**": "allow",
+      "~/.config/opencode/**": "allow",
+      "~/.local/share/opencode/log/**": "allow",
+      "~/.copilot/**": "allow",
+      "~/code/**": "allow"
+    },
+    "websearch": "allow"
+  },
+  "share": "disabled",
+  "lsp": true,
+  "provider": {
+    "llama-server": {
+      "npm": "@ai-sdk/openai-compatible",
+      "name": "llama-server",
+      "options": {
+        "baseURL": "http://127.0.0.1:8080/v1"
+      },
+      "models": {
+        "OmniCoder-2-9B.Q8_0": {
+          "name": "OmniCoder 2 9B Q8 (llama-server)",
+          "tools": true,
+          "agent": {
+            "plan": {
+              "temperature": 0.1
+            },
+            "build": {
+              "temperature": 0.3
+            },
+            "brainstorm": {
+              "temperature": 0.7
+            }
+          },
+          "limit": {
+            "context": 32768,
+            "output": 4096
+          }
+        },
+        "Qwopus3.5-9B-Coder-MTP-Q8_0": {
+          "name": "Qwopus3.5 9B Coder MTP Q8 (llama-server)",
+          "tools": true,
+          "agent": {
+            "plan": {
+              "temperature": 0.1
+            },
+            "build": {
+              "temperature": 0.3
+            },
+            "brainstorm": {
+              "temperature": 0.7
+            }
+          },
+          "limit": {
+            "context": 32768,
+            "output": 4096
+          }
+        },
+        "Qwopus3.6-27B-v2-MTP-Q4_K_M": {
+          "name": "Qwopus3.6 27B MTP Q4 (llama-server)",
+          "tools": true,
+          "agent": {
+            "plan": {
+              "temperature": 0.1
+            },
+            "orchestrator": {
+              "temperature": 0.2
+            },
+            "build": {
+              "temperature": 0.3
+            },
+            "brainstorm": {
+              "temperature": 0.7
+            }
+          },
+          "limit": {
+            "context": 32768,
+            "output": 4096
+          }
+        },
+        "Qwopus3.6-35B-A3B-v1-MTP-Q4_K_M": {
+          "name": "Qwopus3.6 35B A3B MTP Q4 (llama-server)",
+          "tools": true,
+          "agent": {
+            "plan": {
+              "temperature": 0.1
+            },
+            "orchestrator": {
+              "temperature": 0.2
+            },
+            "build": {
+              "temperature": 0.3
+            },
+            "brainstorm": {
+              "temperature": 0.7
+            }
+          },
+          "limit": {
+            "context": 32768,
+            "output": 4096
+          }
+        },
+        "agentica-org_DeepCoder-14B-Preview-Q5_K_M": {
+          "name": "DeepCoder 14B Q5 (llama-server)",
+          "tools": true,
+          "agent": {
+            "plan": {
+              "temperature": 0.1
+            },
+            "build": {
+              "temperature": 0.3
+            },
+            "brainstorm": {
+              "temperature": 0.7
+            }
+          },
+          "limit": {
+            "context": 32768,
+            "output": 4096
+          }
+        }
+      }
+    }
+  },
+  "mcp": {
+    "all-agents": {
+      "type": "local",
+      "command": [
+        "node",
+        "--experimental-strip-types",
+        "/home/dev/dotfiles/.agents/mcp/index.ts"
+      ]
+    }
+  }
+}
--- a/install.sh
+++ b/install.sh
@ -1,10 +1,15 @@
 #!/usr/bin/env bash
-# install.sh — Wire ~/dotfiles/.agents/ into global tool configs. (Script lives at repo root.)
+# install.sh — Wire .agents/ into global tool configs.
+# Run with --host to also install llama-server config and systemd services.
 # Idempotent: safe to re-run. Creates dirs, symlinks, and config entries.
 # Run once per machine after cloning dotfiles.
 set -euo pipefail

+INSTALL_HOST=false
+for arg in "$@"; do case "$arg" in --host) INSTALL_HOST=true ;; esac; done
+
 DOTFILES_AGENTS="$(cd "$(dirname "$0")" && pwd)/.agents"
+DOTFILES_CONFIG="$(cd "$(dirname "$0")" && pwd)/config"

 log() { printf '\033[0;32m✓\033[0m %s\n' "$1"; }
 warn() { printf '\033[0;33m⚠\033[0m %s\n' "$1"; }
@ -86,32 +91,98 @@ else
  log "OpenCode AGENTS.md symlink: $OC_AGENTS_LINK → $OC_AGENTS_TARGET"
 fi

-# ── 4. OpenCode global MCP entry ────────────────────────────────────────────
-OC_CONFIG="$HOME/.config/opencode/opencode.json"
-MCP_KEY="all-agents"
-MCP_CMD="[\"node\", \"--experimental-strip-types\", \"$DOTFILES_AGENTS/mcp/index.ts\"]"
+# ── 4. OpenCode global config (opencode.json) ────────────────────────────────
+OC_CONFIG_SOURCE="$DOTFILES_CONFIG/opencode/opencode.json"
+OC_CONFIG_LINK="$HOME/.config/opencode/opencode.json"

-if [[ ! -f "$OC_CONFIG" ]]; then
-  warn "No OpenCode config at $OC_CONFIG — creating minimal config with MCP entry."
-  printf '{\n  "$schema": "https://opencode.ai/config.json",\n  "mcp": {\n    "%s": {\n      "type": "local",\n      "command": %s\n    }\n  }\n}\n' "$MCP_KEY" "$MCP_CMD" > "$OC_CONFIG"
-  log "Created $OC_CONFIG with all-agents MCP entry"
-elif node -e "const c=JSON.parse(require('fs').readFileSync('$OC_CONFIG','utf8')); process.exit(c.mcp && c.mcp['$MCP_KEY'] ? 0 : 1)" 2>/dev/null; then
-  skip "OpenCode MCP entry '$MCP_KEY' already present in $OC_CONFIG"
+mkdir -p "$(dirname "$OC_CONFIG_LINK")"
+if [[ -L "$OC_CONFIG_LINK" && "$(readlink "$OC_CONFIG_LINK")" == "$OC_CONFIG_SOURCE" ]]; then
+  skip "OpenCode config symlink already set: $OC_CONFIG_LINK"
 else
-  # Merge the MCP entry using node — jq may not be available everywhere
-  node -e "
-const fs = require('fs');
-const path = '$OC_CONFIG';
-const config = JSON.parse(fs.readFileSync(path, 'utf8'));
-config.mcp = config.mcp || {};
-config.mcp['$MCP_KEY'] = { type: 'local', command: $MCP_CMD };
-fs.writeFileSync(path, JSON.stringify(config, null, 2) + '\n');
-console.log('Merged all-agents MCP entry into ' + path);
-"
-  log "OpenCode MCP entry merged: $OC_CONFIG"
+  ln -sf "$OC_CONFIG_SOURCE" "$OC_CONFIG_LINK"
+  log "OpenCode config symlink: $OC_CONFIG_LINK → $OC_CONFIG_SOURCE"
 fi

-# ── 5. VS Code global MCP ────────────────────────────────────────────────────
+# ── 5. Llama-server host config (requires --host) ───────────────────────────
+if [[ "$INSTALL_HOST" != "true" ]]; then
+  skip "Llama-server host config skipped (use --host to install)"
+else
+
+  # ── 5a. Model downloads (requires --host) ──────────────────────────────────
+  if ! command -v huggingface-cli >/dev/null 2>&1; then
+    warn "huggingface-cli not found — skipping model downloads (install via 'pip install huggingface_hub')"
+  else
+    _hf_download() {
+      local repo="$1" file="$2" dir="$3"
+      local dest="$dir/$file"
+      if [[ -f "$dest" ]]; then
+        skip "Model already present: $dest"
+      else
+        mkdir -p "$dir"
+        huggingface-cli download "$repo" "$file" --local-dir "$dir" >/dev/null
+        log "Downloaded model: $repo/$file → $dest"
+      fi
+    }
+    _hf_download "Jackrong/Qwopus3.6-27B-v2-MTP-GGUF" "Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf" "$HOME/models"
+    _hf_download "Jackrong/Qwopus3.5-9B-Coder-MTP-GGUF" "Qwopus3.5-9B-Coder-MTP-Q8_0.gguf" "$HOME/models"
+    _hf_download "bartowski/agentica-org_DeepCoder-14B-Preview-GGUF" "agentica-org_DeepCoder-14B-Preview-Q5_K_M.gguf" "$HOME/models"
+    _hf_download "byteshape/Qwen3.6-35B-A3B-MTP-GGUF" "Qwen3.6-35B-A3B-IQ3_S-3.06bpw.gguf" "$HOME/models"
+    _hf_download "Jackrong/Qwopus3.6-35B-A3B-v1-MTP-GGUF" "Qwopus3.6-35B-A3B-v1-MTP-Q4_K_M.gguf" "$HOME/models"
+    _hf_download "mradermacher/OmniCoder-2-9B-GGUF" "OmniCoder-2-9B.Q8_0.gguf" "$HOME/models/OmniCoder-2-9B.Q8_0"
+    _hf_download "mradermacher/OmniCoder-2-9B-GGUF" "mmproj-Q8_0.gguf" "$HOME/models/OmniCoder-2-9B.Q8_0"
+    _hf_download "bartowski/Qwen_Qwen3-14B-GGUF" "Qwen_Qwen3-14B-Q4_K_M.gguf" "$HOME/models"
+    _hf_download "bartowski/Qwen_Qwen3.6-27B-GGUF" "Qwen_Qwen3.6-27B-Q4_K_M.gguf" "$HOME/models"
+  fi
+
+  PRESETS_SRC="$DOTFILES_CONFIG/llama-server/presets.ini"
+  PRESETS_DST="$HOME/models/presets.ini"
+  mkdir -p "$HOME/models"
+  if diff -q "$PRESETS_SRC" "$PRESETS_DST" >/dev/null 2>&1; then
+    skip "presets.ini already up-to-date: $PRESETS_DST"
+  else
+    cp "$PRESETS_SRC" "$PRESETS_DST"
+    log "Installed presets.ini → $PRESETS_DST"
+  fi
+
+  SVC_SRC="$DOTFILES_CONFIG/llama-server/llama-server.service"
+  SVC_DST="/etc/systemd/system/llama-server.service"
+  if diff -q "$SVC_SRC" "$SVC_DST" >/dev/null 2>&1; then
+    skip "llama-server.service already up-to-date: $SVC_DST"
+  else
+    cp "$SVC_SRC" "$SVC_DST"
+    log "Installed llama-server.service → $SVC_DST"
+  fi
+
+  PATH_SRC="$DOTFILES_CONFIG/llama-server/llama-server-presets.path"
+  PATH_DST="/etc/systemd/system/llama-server-presets.path"
+  if diff -q "$PATH_SRC" "$PATH_DST" >/dev/null 2>&1; then
+    skip "llama-server-presets.path already up-to-date: $PATH_DST"
+  else
+    cp "$PATH_SRC" "$PATH_DST"
+    log "Installed llama-server-presets.path → $PATH_DST"
+  fi
+
+  PSVC_SRC="$DOTFILES_CONFIG/llama-server/llama-server-presets.service"
+  PSVC_DST="/etc/systemd/system/llama-server-presets.service"
+  if diff -q "$PSVC_SRC" "$PSVC_DST" >/dev/null 2>&1; then
+    skip "llama-server-presets.service already up-to-date: $PSVC_DST"
+  else
+    cp "$PSVC_SRC" "$PSVC_DST"
+    log "Installed llama-server-presets.service → $PSVC_DST"
+  fi
+
+  START_SRC="$DOTFILES_CONFIG/llama-server/start.sh"
+  START_DST="/opt/llama-server/start.sh"
+  mkdir -p "$(dirname "$START_DST")"
+  if diff -q "$START_SRC" "$START_DST" >/dev/null 2>&1; then
+    skip "start.sh already up-to-date: $START_DST"
+  else
+    cp "$START_SRC" "$START_DST"
+    log "Installed start.sh → $START_DST"
+  fi
+fi
+
+# ── 6. VS Code global MCP ────────────────────────────────────────────────────
 # Primary remote/server path; falls back to local if running VS Code locally.
 VSCODE_MCP_PATHS=(
  "$HOME/.vscode-server/data/User/mcp.json"
@ -121,6 +192,7 @@ VSCODE_MCP_PATHS=(

 for VSCODE_MCP in "${VSCODE_MCP_PATHS[@]}"; do
  if [[ -d "$(dirname "$VSCODE_MCP")" ]]; then
+    MCP_KEY="all-agents"
    MCP_SERVER_CMD="node"
    MCP_SERVER_ARGS="[\"--experimental-strip-types\", \"$DOTFILES_AGENTS/mcp/index.ts\"]"

@ -150,7 +222,7 @@ if (changed) {
  fi
 done

-# ── 6. VS Code global prompts dir ───────────────────────────────────────────
+# ── 7. VS Code global prompts dir ───────────────────────────────────────────
 for VSCODE_PROMPTS_DIR in \
  "$HOME/.vscode-server/data/User/prompts" \
  "$HOME/.vscode/data/User/prompts"; do
@ -161,7 +233,7 @@ for VSCODE_PROMPTS_DIR in \
  fi
 done

-# ── 7. MCP server dependencies ───────────────────────────────────────────────
+# ── 8. MCP server dependencies ───────────────────────────────────────────────
 MCP_DIR="$DOTFILES_AGENTS/mcp"
 if [[ ! -d "$MCP_DIR/node_modules/@modelcontextprotocol" ]]; then
  log "Installing MCP server dependencies (npm install in $MCP_DIR)..."