fix: move more into install.sh
model config, downloads, llama-server service and a README.md to explain --host
This commit is contained in:
parent
d237fcfcd3
commit
2a00366204
39
README.md
Normal file
39
README.md
Normal file
@ -0,0 +1,39 @@
|
||||
# dotfiles
|
||||
|
||||
Personal dotfiles and AI-agent infrastructure for VS Code Copilot and OpenCode.
|
||||
|
||||
## Quick Start
|
||||
|
||||
For host machines, install dotfiles plus llama-server config and systemd services via the `--host` flag:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/username/dotfiles ~/dotfiles
|
||||
~/dotfiles/install.sh --host
|
||||
```
|
||||
|
||||
If using devcontainers, drop the `--host` flag in the Dockerfile or just rely on vscode settings or, possibly better, a devcontainer "features" config such as:
|
||||
|
||||
```json
|
||||
"features": {
|
||||
"ghcr.io/willfantom/features/dotfiles:1": {
|
||||
"repository": "git@git.bcdewitt.ddns.net:bcdewitt/dotfiles.git",
|
||||
"targetPath": "~/dotfiles",
|
||||
"installCommand": "install.sh"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## What Gets Installed
|
||||
|
||||
**Basic install** (`install.sh`):
|
||||
- Agent hooks wired into VS Code Copilot and OpenCode (the `.agents/` infrastructure)
|
||||
- OpenCode config symlinked to `~/.config/opencode/opencode.json`
|
||||
|
||||
**Host install** (`install.sh --host`):
|
||||
- Everything in basic install, plus:
|
||||
- llama-server presets, startup script, and systemd units from `config/llama-server/`
|
||||
|
||||
## Idempotent
|
||||
|
||||
The install script is idempotent — safe to re-run at any time. It skips steps that
|
||||
are already in place and only changes what needs updating.
|
||||
8
config/llama-server/llama-server-presets.path
Normal file
8
config/llama-server/llama-server-presets.path
Normal file
@ -0,0 +1,8 @@
|
||||
[Unit]
|
||||
Description=Restart llama-server when presets.ini changes
|
||||
|
||||
[Path]
|
||||
PathModified=/home/dev/models/presets.ini
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
6
config/llama-server/llama-server-presets.service
Normal file
6
config/llama-server/llama-server-presets.service
Normal file
@ -0,0 +1,6 @@
|
||||
[Unit]
|
||||
Description=Restart llama-server (triggered by presets.ini change)
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/systemctl restart llama-server
|
||||
15
config/llama-server/llama-server.service
Normal file
15
config/llama-server/llama-server.service
Normal file
@ -0,0 +1,15 @@
|
||||
[Unit]
|
||||
Description=llama-server
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/opt/llama-server/start.sh
|
||||
User=ollama
|
||||
Group=ollama
|
||||
Restart=always
|
||||
RestartSec=3
|
||||
Environment="PATH=/home/dev/.nvm/versions/node/v24.15.0/bin:/home/dev/.opencode/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/usr/lib/wsl/lib:/mnt/c/Program Files (x86)/NVIDIA Corporation/PhysX/Common:/mnt/c/Python312/Scripts/:/mnt/c/Python312/:/mnt/c/Program Files/Microsoft/jdk-17.0.8.7-hotspot/bin:/mnt/c/Program Files/Oculus/Support/oculus-runtime:/mnt/c/Windows/system32:/mnt/c/Windows:/mnt/c/Windows/System32/Wbem:/mnt/c/Windows/System32/WindowsPowerShell/v1.0/:/mnt/c/Windows/System32/OpenSSH/:/mnt/c/Program Files/dotnet/:/mnt/c/Program Files/Microsoft VS Code/bin:/mnt/c/WINDOWS/system32:/mnt/c/WINDOWS:/mnt/c/WINDOWS/System32/Wbem:/mnt/c/WINDOWS/System32/WindowsPowerShell/v1.0/:/mnt/c/WINDOWS/System32/OpenSSH/:/mnt/c/Program Files/nodejs/:/mnt/c/ProgramData/chocolatey/bin:/mnt/c/Users/Dev/AppData/Local/Programs/cursor/resources/app/bin:/mnt/c/WINDOWS/system32:/mnt/c/WINDOWS:/mnt/c/WINDOWS/System32/Wbem:/mnt/c/WINDOWS/System32/WindowsPowerShell/v1.0/:/mnt/c/WINDOWS/System32/OpenSSH/:/mnt/c/Program Files/Docker/Docker/resources/bin:/mnt/c/Users/Dev/AppData/Local/Microsoft/WindowsApps:/mnt/c/Users/Dev/AppData/Roaming/npm:/mnt/c/Users/Dev/.dotnet/tools:/mnt/c/Users/Dev/AppData/Local/Microsoft/WinGet/Packages/albertony.npiperelay_Microsoft.Winget.Source_8wekyb3d8bbwe:/mnt/c/Users/Dev/.lmstudio/bin:/snap/bin"
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
147
config/llama-server/presets.ini
Normal file
147
config/llama-server/presets.ini
Normal file
@ -0,0 +1,147 @@
|
||||
version = 1
|
||||
|
||||
; ─── Global ──────────────────────────────────────────────────────────────────
|
||||
; Settings in [*] are inherited by every model loaded by the router.
|
||||
; Per-model sections below override individual keys.
|
||||
[*]
|
||||
|
||||
; Number of model layers to offload to GPU.
|
||||
; 99 means "offload everything" — llama.cpp loads as many as fit and falls back
|
||||
; to CPU automatically for any overflow. Using an explicit value avoids the
|
||||
; occasional conservative auto-estimate.
|
||||
; Default: auto
|
||||
; n-gpu-layers = 99
|
||||
|
||||
; Flash Attention: reduces KV-cache VRAM usage and speeds up long-context
|
||||
; inference by computing attention without materializing the full NxN matrix.
|
||||
; "on" forces it; "auto" (default) enables it when CUDA is detected — same
|
||||
; effect in practice, but explicit is clearer here.
|
||||
; Default: auto
|
||||
flash-attn = on
|
||||
|
||||
; Number of CPU threads used for non-GPU work: tokenization, sampling, and any
|
||||
; layers that overflow VRAM during hybrid inference. ~2/3 of physical cores is
|
||||
; the rule of thumb; going higher causes contention on the same cores the GPU
|
||||
; DMA uses. (Machine has 12 logical cores → 8 threads.)
|
||||
; Default: -1 (use all cores)
|
||||
threads = 8
|
||||
|
||||
; Number of inference slots (parallel sequences). 1 = single-user server with
|
||||
; no batching overhead. Increase only if you need concurrent requests; each
|
||||
; extra slot consumes a proportional share of KV-cache VRAM.
|
||||
; Default: -1 (auto, usually 1)
|
||||
parallel = 1
|
||||
|
||||
; Jinja2 chat templating — required for models with complex chat templates
|
||||
; (e.g. Qwen3, which uses raise_exception() guards). Without this, llama.cpp
|
||||
; falls back to a static PEG auto-parser that can't handle those templates.
|
||||
jinja = on
|
||||
|
||||
; Token budget for chain-of-thought reasoning.
|
||||
; -1 = unrestricted (model decides when to stop thinking)
|
||||
; 0 = disable thinking entirely
|
||||
; N = hard cap at N tokens, then force the model to answer
|
||||
; Commented out: matches the default (-1 = unrestricted).
|
||||
; reasoning-budget = -1
|
||||
|
||||
ctx-size = 32768
|
||||
n-predict = 4096
|
||||
|
||||
; ─── Qwen3-14B ───────────────────────────────────────────────────────────────
|
||||
; ~8.5 GB GGUF — fits fully in 12 GB VRAM. Fast (~12–18 tok/s). Good daily
|
||||
; driver for interactive coding and Q&A.
|
||||
[Qwen_Qwen3-14B-Q4_K_M]
|
||||
|
||||
; Full 32 K context is safe: 14B fits in VRAM with plenty of headroom for the
|
||||
; KV cache. At 32 K × 2 bytes × 2 (K+V) × 40 layers ≈ ~5 GB worst-case KV.
|
||||
; Default: 0 (read from model metadata, typically the training context limit)
|
||||
ctx-size = 32768
|
||||
|
||||
; Cap generation at 4096 tokens. Prevents runaway responses; raise if you need
|
||||
; longer output (documentation, large refactors). Default: -1 (unlimited)
|
||||
n-predict = 4096
|
||||
|
||||
|
||||
; ─── OmniCoder-2-9B ──────────────────────────────────────────────────────────
|
||||
; ~9.4 GB GGUF — fits fully in 12 GB VRAM. Fast generation. Vision-capable
|
||||
; (multimodal projector at OmniCoder-2-9B.Q8_0/mmproj-Q8_0.gguf — auto-detected
|
||||
; from subdirectory layout by the router).
|
||||
[OmniCoder-2-9B.Q8_0]
|
||||
|
||||
; Full 32 K context fits comfortably alongside 9B weights.
|
||||
; Default: 0 (read from model metadata)
|
||||
ctx-size = 32768
|
||||
|
||||
; Cap generation at 4096 tokens. Default: -1 (unlimited)
|
||||
n-predict = 4096
|
||||
|
||||
|
||||
; ─── Qwen3.6-35B-A3B (MoE + MTP) ────────────────────────────────────────────
|
||||
; 13.6 GB GGUF — ~12 GB on GPU, ~1.6 GB CPU offload on a 12 GB card.
|
||||
; MoE model: only ~3B parameters active per forward pass despite 35B total.
|
||||
; MTP (multi-token prediction) heads baked in — uses draft-mtp speculative
|
||||
; decoding to roughly double throughput vs non-speculative. Requires b9279+.
|
||||
[Qwen3.6-35B-A3B-IQ3_S-3.06bpw]
|
||||
|
||||
; KV cache is small (~31 MiB/1K tokens) due to GQA — 32K context only needs
|
||||
; ~1 GB KV cache, which pages to CPU gracefully without major throughput loss.
|
||||
ctx-size = 32768
|
||||
|
||||
; Cap generation at 4096 tokens. Default: -1 (unlimited)
|
||||
n-predict = 4096
|
||||
|
||||
; Multi-token prediction speculative decoding.
|
||||
; spec-type = draft-mtp uses MTP heads built into the model weights.
|
||||
spec-type = draft-mtp
|
||||
|
||||
; Minimum acceptance probability for a speculated draft token (0–1).
|
||||
; 0.75 = accept tokens the model is 75%+ confident in. Lower = more aggressive
|
||||
; speculation (faster but slightly more divergence risk).
|
||||
spec-draft-p-min = 0.75
|
||||
|
||||
; Max tokens to speculate per step. 3 is the sweet spot for Qwen3.6 MTP.
|
||||
spec-draft-n-max = 3
|
||||
|
||||
|
||||
; ─── Qwen3.6-27B ─────────────────────────────────────────────────────────────
|
||||
; 17 GB GGUF — ~12 GB on GPU, ~5 GB CPU offload on a 12 GB card.
|
||||
; Slower (~4–8 tok/s) due to CPU↔GPU transfers; best for deep analysis tasks.
|
||||
[Qwen_Qwen3.6-27B-Q4_K_M]
|
||||
|
||||
; Smaller context than 14B to keep the KV cache on-GPU. At 16 K the KV cache
|
||||
; is roughly half the size, which reduces how much spills to CPU on each
|
||||
; forward pass — meaningful when every byte of VRAM is already spoken for.
|
||||
; Default: 0 (read from model metadata)
|
||||
ctx-size = 16384
|
||||
|
||||
; Cap generation at 4096 tokens. Default: -1 (unlimited)
|
||||
n-predict = 4096
|
||||
|
||||
[Qwopus3.6-27B-v2-MTP-Q4_K_M]
|
||||
|
||||
ctx-size = 32768
|
||||
n-predict = 4096
|
||||
spec-type = draft-mtp
|
||||
spec-draft-p-min = 0.75
|
||||
spec-draft-n-max = 3
|
||||
|
||||
[Qwopus3.6-35B-A3B-v1-MTP-Q4_K_M]
|
||||
|
||||
ctx-size = 32768
|
||||
n-predict = 4096
|
||||
spec-type = draft-mtp
|
||||
spec-draft-p-min = 0.75
|
||||
spec-draft-n-max = 3
|
||||
|
||||
[Qwopus3.5-9B-Coder-MTP-Q8_0]
|
||||
|
||||
ctx-size = 65536
|
||||
n-predict = 4096
|
||||
spec-type = draft-mtp
|
||||
spec-draft-p-min = 0.75
|
||||
spec-draft-n-max = 3
|
||||
|
||||
[agentica-org_DeepCoder-14B-Preview-Q5_K_M]
|
||||
|
||||
ctx-size = 32768
|
||||
n-predict = 4096
|
||||
9
config/llama-server/start.sh
Executable file
9
config/llama-server/start.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
export LD_LIBRARY_PATH=/opt/llama-server${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
|
||||
cd /opt/llama-server
|
||||
exec /opt/llama-server/llama-server \
|
||||
--models-dir /home/dev/models \
|
||||
--models-max 1 \
|
||||
--models-preset /home/dev/models/presets.ini \
|
||||
--host 127.0.0.1 \
|
||||
--port 8080
|
||||
172
config/opencode/opencode.json
Normal file
172
config/opencode/opencode.json
Normal file
@ -0,0 +1,172 @@
|
||||
{
|
||||
"$schema": "https://opencode.ai/config.json",
|
||||
"default_agent": "orchestrator",
|
||||
"compaction": {
|
||||
"reserved": 3000
|
||||
},
|
||||
"agent": {
|
||||
"orchestrator": {
|
||||
"mode": "all",
|
||||
"model": "llama-server/Qwopus3.6-27B-v2-MTP-Q4_K_M",
|
||||
"permission": {
|
||||
"edit": "deny",
|
||||
"bash": {
|
||||
"*": "deny",
|
||||
"* /tmp/.last-user-prompt.txt": "allow",
|
||||
"* /tmp/.last-user-prompt.txt << *": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"build": {
|
||||
"mode": "subagent",
|
||||
"permission": {
|
||||
"webfetch": "deny",
|
||||
"websearch": "deny",
|
||||
"question": "deny",
|
||||
"todowrite": "deny",
|
||||
"skill": "deny"
|
||||
}
|
||||
},
|
||||
"research": {
|
||||
"mode": "all",
|
||||
"permission": {
|
||||
"*": "allow"
|
||||
}
|
||||
}
|
||||
},
|
||||
"permission": {
|
||||
"external_directory": {
|
||||
"/tmp/**": "allow",
|
||||
"~/dotfiles/**": "allow",
|
||||
"~/.config/opencode/**": "allow",
|
||||
"~/.local/share/opencode/log/**": "allow",
|
||||
"~/.copilot/**": "allow",
|
||||
"~/code/**": "allow"
|
||||
},
|
||||
"websearch": "allow"
|
||||
},
|
||||
"share": "disabled",
|
||||
"lsp": true,
|
||||
"provider": {
|
||||
"llama-server": {
|
||||
"npm": "@ai-sdk/openai-compatible",
|
||||
"name": "llama-server",
|
||||
"options": {
|
||||
"baseURL": "http://127.0.0.1:8080/v1"
|
||||
},
|
||||
"models": {
|
||||
"OmniCoder-2-9B.Q8_0": {
|
||||
"name": "OmniCoder 2 9B Q8 (llama-server)",
|
||||
"tools": true,
|
||||
"agent": {
|
||||
"plan": {
|
||||
"temperature": 0.1
|
||||
},
|
||||
"build": {
|
||||
"temperature": 0.3
|
||||
},
|
||||
"brainstorm": {
|
||||
"temperature": 0.7
|
||||
}
|
||||
},
|
||||
"limit": {
|
||||
"context": 32768,
|
||||
"output": 4096
|
||||
}
|
||||
},
|
||||
"Qwopus3.5-9B-Coder-MTP-Q8_0": {
|
||||
"name": "Qwopus3.5 9B Coder MTP Q8 (llama-server)",
|
||||
"tools": true,
|
||||
"agent": {
|
||||
"plan": {
|
||||
"temperature": 0.1
|
||||
},
|
||||
"build": {
|
||||
"temperature": 0.3
|
||||
},
|
||||
"brainstorm": {
|
||||
"temperature": 0.7
|
||||
}
|
||||
},
|
||||
"limit": {
|
||||
"context": 32768,
|
||||
"output": 4096
|
||||
}
|
||||
},
|
||||
"Qwopus3.6-27B-v2-MTP-Q4_K_M": {
|
||||
"name": "Qwopus3.6 27B MTP Q4 (llama-server)",
|
||||
"tools": true,
|
||||
"agent": {
|
||||
"plan": {
|
||||
"temperature": 0.1
|
||||
},
|
||||
"orchestrator": {
|
||||
"temperature": 0.2
|
||||
},
|
||||
"build": {
|
||||
"temperature": 0.3
|
||||
},
|
||||
"brainstorm": {
|
||||
"temperature": 0.7
|
||||
}
|
||||
},
|
||||
"limit": {
|
||||
"context": 32768,
|
||||
"output": 4096
|
||||
}
|
||||
},
|
||||
"Qwopus3.6-35B-A3B-v1-MTP-Q4_K_M": {
|
||||
"name": "Qwopus3.6 35B A3B MTP Q4 (llama-server)",
|
||||
"tools": true,
|
||||
"agent": {
|
||||
"plan": {
|
||||
"temperature": 0.1
|
||||
},
|
||||
"orchestrator": {
|
||||
"temperature": 0.2
|
||||
},
|
||||
"build": {
|
||||
"temperature": 0.3
|
||||
},
|
||||
"brainstorm": {
|
||||
"temperature": 0.7
|
||||
}
|
||||
},
|
||||
"limit": {
|
||||
"context": 32768,
|
||||
"output": 4096
|
||||
}
|
||||
},
|
||||
"agentica-org_DeepCoder-14B-Preview-Q5_K_M": {
|
||||
"name": "DeepCoder 14B Q5 (llama-server)",
|
||||
"tools": true,
|
||||
"agent": {
|
||||
"plan": {
|
||||
"temperature": 0.1
|
||||
},
|
||||
"build": {
|
||||
"temperature": 0.3
|
||||
},
|
||||
"brainstorm": {
|
||||
"temperature": 0.7
|
||||
}
|
||||
},
|
||||
"limit": {
|
||||
"context": 32768,
|
||||
"output": 4096
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mcp": {
|
||||
"all-agents": {
|
||||
"type": "local",
|
||||
"command": [
|
||||
"node",
|
||||
"--experimental-strip-types",
|
||||
"/home/dev/dotfiles/.agents/mcp/index.ts"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
122
install.sh
122
install.sh
@ -1,10 +1,15 @@
|
||||
#!/usr/bin/env bash
|
||||
# install.sh — Wire ~/dotfiles/.agents/ into global tool configs. (Script lives at repo root.)
|
||||
# install.sh — Wire .agents/ into global tool configs.
|
||||
# Run with --host to also install llama-server config and systemd services.
|
||||
# Idempotent: safe to re-run. Creates dirs, symlinks, and config entries.
|
||||
# Run once per machine after cloning dotfiles.
|
||||
set -euo pipefail
|
||||
|
||||
INSTALL_HOST=false
|
||||
for arg in "$@"; do case "$arg" in --host) INSTALL_HOST=true ;; esac; done
|
||||
|
||||
DOTFILES_AGENTS="$(cd "$(dirname "$0")" && pwd)/.agents"
|
||||
DOTFILES_CONFIG="$(cd "$(dirname "$0")" && pwd)/config"
|
||||
|
||||
log() { printf '\033[0;32m✓\033[0m %s\n' "$1"; }
|
||||
warn() { printf '\033[0;33m⚠\033[0m %s\n' "$1"; }
|
||||
@ -86,32 +91,98 @@ else
|
||||
log "OpenCode AGENTS.md symlink: $OC_AGENTS_LINK → $OC_AGENTS_TARGET"
|
||||
fi
|
||||
|
||||
# ── 4. OpenCode global MCP entry ────────────────────────────────────────────
|
||||
OC_CONFIG="$HOME/.config/opencode/opencode.json"
|
||||
MCP_KEY="all-agents"
|
||||
MCP_CMD="[\"node\", \"--experimental-strip-types\", \"$DOTFILES_AGENTS/mcp/index.ts\"]"
|
||||
# ── 4. OpenCode global config (opencode.json) ────────────────────────────────
|
||||
OC_CONFIG_SOURCE="$DOTFILES_CONFIG/opencode/opencode.json"
|
||||
OC_CONFIG_LINK="$HOME/.config/opencode/opencode.json"
|
||||
|
||||
if [[ ! -f "$OC_CONFIG" ]]; then
|
||||
warn "No OpenCode config at $OC_CONFIG — creating minimal config with MCP entry."
|
||||
printf '{\n "$schema": "https://opencode.ai/config.json",\n "mcp": {\n "%s": {\n "type": "local",\n "command": %s\n }\n }\n}\n' "$MCP_KEY" "$MCP_CMD" > "$OC_CONFIG"
|
||||
log "Created $OC_CONFIG with all-agents MCP entry"
|
||||
elif node -e "const c=JSON.parse(require('fs').readFileSync('$OC_CONFIG','utf8')); process.exit(c.mcp && c.mcp['$MCP_KEY'] ? 0 : 1)" 2>/dev/null; then
|
||||
skip "OpenCode MCP entry '$MCP_KEY' already present in $OC_CONFIG"
|
||||
mkdir -p "$(dirname "$OC_CONFIG_LINK")"
|
||||
if [[ -L "$OC_CONFIG_LINK" && "$(readlink "$OC_CONFIG_LINK")" == "$OC_CONFIG_SOURCE" ]]; then
|
||||
skip "OpenCode config symlink already set: $OC_CONFIG_LINK"
|
||||
else
|
||||
# Merge the MCP entry using node — jq may not be available everywhere
|
||||
node -e "
|
||||
const fs = require('fs');
|
||||
const path = '$OC_CONFIG';
|
||||
const config = JSON.parse(fs.readFileSync(path, 'utf8'));
|
||||
config.mcp = config.mcp || {};
|
||||
config.mcp['$MCP_KEY'] = { type: 'local', command: $MCP_CMD };
|
||||
fs.writeFileSync(path, JSON.stringify(config, null, 2) + '\n');
|
||||
console.log('Merged all-agents MCP entry into ' + path);
|
||||
"
|
||||
log "OpenCode MCP entry merged: $OC_CONFIG"
|
||||
ln -sf "$OC_CONFIG_SOURCE" "$OC_CONFIG_LINK"
|
||||
log "OpenCode config symlink: $OC_CONFIG_LINK → $OC_CONFIG_SOURCE"
|
||||
fi
|
||||
|
||||
# ── 5. VS Code global MCP ────────────────────────────────────────────────────
|
||||
# ── 5. Llama-server host config (requires --host) ───────────────────────────
|
||||
if [[ "$INSTALL_HOST" != "true" ]]; then
|
||||
skip "Llama-server host config skipped (use --host to install)"
|
||||
else
|
||||
|
||||
# ── 5a. Model downloads (requires --host) ──────────────────────────────────
|
||||
if ! command -v huggingface-cli >/dev/null 2>&1; then
|
||||
warn "huggingface-cli not found — skipping model downloads (install via 'pip install huggingface_hub')"
|
||||
else
|
||||
_hf_download() {
|
||||
local repo="$1" file="$2" dir="$3"
|
||||
local dest="$dir/$file"
|
||||
if [[ -f "$dest" ]]; then
|
||||
skip "Model already present: $dest"
|
||||
else
|
||||
mkdir -p "$dir"
|
||||
huggingface-cli download "$repo" "$file" --local-dir "$dir" >/dev/null
|
||||
log "Downloaded model: $repo/$file → $dest"
|
||||
fi
|
||||
}
|
||||
_hf_download "Jackrong/Qwopus3.6-27B-v2-MTP-GGUF" "Qwopus3.6-27B-v2-MTP-Q4_K_M.gguf" "$HOME/models"
|
||||
_hf_download "Jackrong/Qwopus3.5-9B-Coder-MTP-GGUF" "Qwopus3.5-9B-Coder-MTP-Q8_0.gguf" "$HOME/models"
|
||||
_hf_download "bartowski/agentica-org_DeepCoder-14B-Preview-GGUF" "agentica-org_DeepCoder-14B-Preview-Q5_K_M.gguf" "$HOME/models"
|
||||
_hf_download "byteshape/Qwen3.6-35B-A3B-MTP-GGUF" "Qwen3.6-35B-A3B-IQ3_S-3.06bpw.gguf" "$HOME/models"
|
||||
_hf_download "Jackrong/Qwopus3.6-35B-A3B-v1-MTP-GGUF" "Qwopus3.6-35B-A3B-v1-MTP-Q4_K_M.gguf" "$HOME/models"
|
||||
_hf_download "mradermacher/OmniCoder-2-9B-GGUF" "OmniCoder-2-9B.Q8_0.gguf" "$HOME/models/OmniCoder-2-9B.Q8_0"
|
||||
_hf_download "mradermacher/OmniCoder-2-9B-GGUF" "mmproj-Q8_0.gguf" "$HOME/models/OmniCoder-2-9B.Q8_0"
|
||||
_hf_download "bartowski/Qwen_Qwen3-14B-GGUF" "Qwen_Qwen3-14B-Q4_K_M.gguf" "$HOME/models"
|
||||
_hf_download "bartowski/Qwen_Qwen3.6-27B-GGUF" "Qwen_Qwen3.6-27B-Q4_K_M.gguf" "$HOME/models"
|
||||
fi
|
||||
|
||||
PRESETS_SRC="$DOTFILES_CONFIG/llama-server/presets.ini"
|
||||
PRESETS_DST="$HOME/models/presets.ini"
|
||||
mkdir -p "$HOME/models"
|
||||
if diff -q "$PRESETS_SRC" "$PRESETS_DST" >/dev/null 2>&1; then
|
||||
skip "presets.ini already up-to-date: $PRESETS_DST"
|
||||
else
|
||||
cp "$PRESETS_SRC" "$PRESETS_DST"
|
||||
log "Installed presets.ini → $PRESETS_DST"
|
||||
fi
|
||||
|
||||
SVC_SRC="$DOTFILES_CONFIG/llama-server/llama-server.service"
|
||||
SVC_DST="/etc/systemd/system/llama-server.service"
|
||||
if diff -q "$SVC_SRC" "$SVC_DST" >/dev/null 2>&1; then
|
||||
skip "llama-server.service already up-to-date: $SVC_DST"
|
||||
else
|
||||
cp "$SVC_SRC" "$SVC_DST"
|
||||
log "Installed llama-server.service → $SVC_DST"
|
||||
fi
|
||||
|
||||
PATH_SRC="$DOTFILES_CONFIG/llama-server/llama-server-presets.path"
|
||||
PATH_DST="/etc/systemd/system/llama-server-presets.path"
|
||||
if diff -q "$PATH_SRC" "$PATH_DST" >/dev/null 2>&1; then
|
||||
skip "llama-server-presets.path already up-to-date: $PATH_DST"
|
||||
else
|
||||
cp "$PATH_SRC" "$PATH_DST"
|
||||
log "Installed llama-server-presets.path → $PATH_DST"
|
||||
fi
|
||||
|
||||
PSVC_SRC="$DOTFILES_CONFIG/llama-server/llama-server-presets.service"
|
||||
PSVC_DST="/etc/systemd/system/llama-server-presets.service"
|
||||
if diff -q "$PSVC_SRC" "$PSVC_DST" >/dev/null 2>&1; then
|
||||
skip "llama-server-presets.service already up-to-date: $PSVC_DST"
|
||||
else
|
||||
cp "$PSVC_SRC" "$PSVC_DST"
|
||||
log "Installed llama-server-presets.service → $PSVC_DST"
|
||||
fi
|
||||
|
||||
START_SRC="$DOTFILES_CONFIG/llama-server/start.sh"
|
||||
START_DST="/opt/llama-server/start.sh"
|
||||
mkdir -p "$(dirname "$START_DST")"
|
||||
if diff -q "$START_SRC" "$START_DST" >/dev/null 2>&1; then
|
||||
skip "start.sh already up-to-date: $START_DST"
|
||||
else
|
||||
cp "$START_SRC" "$START_DST"
|
||||
log "Installed start.sh → $START_DST"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── 6. VS Code global MCP ────────────────────────────────────────────────────
|
||||
# Primary remote/server path; falls back to local if running VS Code locally.
|
||||
VSCODE_MCP_PATHS=(
|
||||
"$HOME/.vscode-server/data/User/mcp.json"
|
||||
@ -121,6 +192,7 @@ VSCODE_MCP_PATHS=(
|
||||
|
||||
for VSCODE_MCP in "${VSCODE_MCP_PATHS[@]}"; do
|
||||
if [[ -d "$(dirname "$VSCODE_MCP")" ]]; then
|
||||
MCP_KEY="all-agents"
|
||||
MCP_SERVER_CMD="node"
|
||||
MCP_SERVER_ARGS="[\"--experimental-strip-types\", \"$DOTFILES_AGENTS/mcp/index.ts\"]"
|
||||
|
||||
@ -150,7 +222,7 @@ if (changed) {
|
||||
fi
|
||||
done
|
||||
|
||||
# ── 6. VS Code global prompts dir ───────────────────────────────────────────
|
||||
# ── 7. VS Code global prompts dir ───────────────────────────────────────────
|
||||
for VSCODE_PROMPTS_DIR in \
|
||||
"$HOME/.vscode-server/data/User/prompts" \
|
||||
"$HOME/.vscode/data/User/prompts"; do
|
||||
@ -161,7 +233,7 @@ for VSCODE_PROMPTS_DIR in \
|
||||
fi
|
||||
done
|
||||
|
||||
# ── 7. MCP server dependencies ───────────────────────────────────────────────
|
||||
# ── 8. MCP server dependencies ───────────────────────────────────────────────
|
||||
MCP_DIR="$DOTFILES_AGENTS/mcp"
|
||||
if [[ ! -d "$MCP_DIR/node_modules/@modelcontextprotocol" ]]; then
|
||||
log "Installing MCP server dependencies (npm install in $MCP_DIR)..."
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user