dcc0bf550a
- P0: unified soul binary with engram_node_full fix, read-back-verify, search fix - P0: move API keys from plaintext plists to macOS Keychain - P0: fix MCP backend URL (port 8742 → 7770) - P1.6: memory-export/import scripts (AES-256-CBC, versioned .neuronmem format) - P1.7: nightly cultivation digest with sharpness metric (launchd at 23:55) - P2.10: Ollama provider in agentic loop (SOUL_LLM_PROVIDER=ollama) - P3.12: refugee importer for ChatGPT/Screenpipe/generic formats - P3.13: GLM-OCR spike — SHIP IT (mlx-vlm, 1.59GB, photo-to-memory.sh)
428 lines
15 KiB
Bash
Executable File
428 lines
15 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# memory-import-refugee.sh — Import conversation/memory history from external apps into Neuron
|
|
#
|
|
# Usage:
|
|
# ./tools/memory-import-refugee.sh --format chatgpt conversations.json
|
|
# ./tools/memory-import-refugee.sh --format screenpipe screenpipe-export.json
|
|
# ./tools/memory-import-refugee.sh --format generic data.json[l]
|
|
#
|
|
# Supported formats:
|
|
# chatgpt — ChatGPT conversation export (conversations.json)
|
|
# screenpipe — Screenpipe OCR export (frames array)
|
|
# generic — Any JSON array or JSONL with content/text fields
|
|
#
|
|
# The script writes Memory nodes to the Neuron soul via its HTTP API.
|
|
# The soul must be running on localhost:7770.
|
|
|
|
set -euo pipefail
|
|
|
|
# ── Config ─────────────────────────────────────────────────────────────────────
|
|
SOUL_HOST="http://localhost:7770"
|
|
# Note: POST /api/neuron/memory ignores the label field (soul hardcodes "memory:remembered").
|
|
# We embed the label in the content prefix so it is searchable.
|
|
MEMORY_API="${SOUL_HOST}/api/neuron/memory"
|
|
SLEEP_MS=100 # ms between API calls (rate limiting)
|
|
|
|
# ── Dependency check ───────────────────────────────────────────────────────────
|
|
if ! command -v jq &>/dev/null; then
|
|
echo "ERROR: jq is required but not installed." >&2
|
|
echo "" >&2
|
|
echo "Install it with:" >&2
|
|
echo " macOS: brew install jq" >&2
|
|
echo " Ubuntu: sudo apt-get install jq" >&2
|
|
echo " Alpine: apk add jq" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# ── Parse args ─────────────────────────────────────────────────────────────────
|
|
FORMAT=""
|
|
INPUT_FILE=""
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--format|-f)
|
|
FORMAT="$2"
|
|
shift 2
|
|
;;
|
|
--format=*|-f=*)
|
|
FORMAT="${1#*=}"
|
|
shift
|
|
;;
|
|
-*)
|
|
echo "Unknown option: $1" >&2
|
|
echo "Usage: $0 --format <chatgpt|screenpipe|generic> <input-file>" >&2
|
|
exit 1
|
|
;;
|
|
*)
|
|
if [[ -z "$INPUT_FILE" ]]; then
|
|
INPUT_FILE="$1"
|
|
else
|
|
echo "Unexpected argument: $1" >&2
|
|
exit 1
|
|
fi
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [[ -z "$FORMAT" ]]; then
|
|
echo "ERROR: --format is required." >&2
|
|
echo "Usage: $0 --format <chatgpt|screenpipe|generic> <input-file>" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ -z "$INPUT_FILE" ]]; then
|
|
echo "ERROR: No input file specified." >&2
|
|
echo "Usage: $0 --format <chatgpt|screenpipe|generic> <input-file>" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [[ ! -f "$INPUT_FILE" ]]; then
|
|
echo "ERROR: Input file not found: $INPUT_FILE" >&2
|
|
exit 1
|
|
fi
|
|
|
|
case "$FORMAT" in
|
|
chatgpt|screenpipe|generic) ;;
|
|
*)
|
|
echo "ERROR: Unknown format: $FORMAT" >&2
|
|
echo "Supported formats: chatgpt, screenpipe, generic" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
# ── Soul health check ──────────────────────────────────────────────────────────
|
|
HTTP_CODE="$(curl -s -o /dev/null -w "%{http_code}" "${SOUL_HOST}/api/neuron/memory" 2>/dev/null || echo "000")"
|
|
if [[ "$HTTP_CODE" == "000" ]]; then
|
|
echo "ERROR: Neuron soul is not responding at ${SOUL_HOST}." >&2
|
|
echo " Start the soul service and retry." >&2
|
|
exit 1
|
|
fi
|
|
|
|
# ── Counters ───────────────────────────────────────────────────────────────────
|
|
IMPORTED=0
|
|
SKIPPED=0
|
|
ERRORS=0
|
|
|
|
# ── Helper: post one memory node ───────────────────────────────────────────────
|
|
# post_memory CONTENT LABEL TAGS_JSON
|
|
#
|
|
# Note: the soul's POST /api/neuron/memory API ignores the label field (hardcodes
|
|
# it to "memory:remembered"). We embed the label as a prefix in the content so
|
|
# the title remains searchable via recall/search.
|
|
post_memory() {
|
|
local content="$1"
|
|
local label="$2"
|
|
local tags_json="$3"
|
|
|
|
# Skip empty content
|
|
if [[ -z "$content" || "$content" == "null" ]]; then
|
|
SKIPPED=$((SKIPPED + 1))
|
|
return 0
|
|
fi
|
|
|
|
# Embed label in content so it's searchable (the API ignores the label field)
|
|
local full_content="[${label}] ${content}"
|
|
|
|
local payload
|
|
payload="$(jq -n \
|
|
--arg content "$full_content" \
|
|
--arg label "$label" \
|
|
--argjson tags "$tags_json" \
|
|
'{content: $content, label: $label, tags: $tags}')"
|
|
|
|
local response
|
|
response="$(curl -s -X POST "$MEMORY_API" \
|
|
-H "Content-Type: application/json" \
|
|
-d "$payload" 2>/dev/null)"
|
|
|
|
local ok
|
|
ok="$(echo "$response" | jq -r '.ok // "false"' 2>/dev/null)"
|
|
|
|
if [[ "$ok" == "true" ]]; then
|
|
IMPORTED=$((IMPORTED + 1))
|
|
else
|
|
ERRORS=$((ERRORS + 1))
|
|
echo " [ERROR] API error for label \"${label:0:60}\": $response" >&2
|
|
fi
|
|
|
|
# Rate limit: sleep 100ms
|
|
sleep "0.${SLEEP_MS}"
|
|
}
|
|
|
|
# ── Format: ChatGPT ────────────────────────────────────────────────────────────
|
|
import_chatgpt() {
|
|
echo "Format: ChatGPT conversation export"
|
|
|
|
# Validate: must be JSON array at top level
|
|
local top_type
|
|
top_type="$(jq -r 'type' "$INPUT_FILE" 2>/dev/null)"
|
|
if [[ "$top_type" != "array" ]]; then
|
|
echo "ERROR: ChatGPT export must be a JSON array of conversations." >&2
|
|
exit 1
|
|
fi
|
|
|
|
local conv_count
|
|
conv_count="$(jq 'length' "$INPUT_FILE")"
|
|
echo "Found ${conv_count} conversation(s) to process."
|
|
echo ""
|
|
|
|
# Count total user messages for progress display
|
|
local total_msgs
|
|
total_msgs="$(jq '[.[].mapping // {} | to_entries[] | .value.message | select(. != null and .author.role == "user") | .content.parts // [] | .[] | select(type == "string" and length > 0)] | length' "$INPUT_FILE" 2>/dev/null || echo "?")"
|
|
echo "Total user messages: ${total_msgs}"
|
|
echo ""
|
|
|
|
local msg_idx=0
|
|
|
|
# Process each conversation
|
|
while IFS= read -r conv_json; do
|
|
local title
|
|
title="$(echo "$conv_json" | jq -r '.title // "Untitled"')"
|
|
|
|
# Truncate label to 100 chars
|
|
local label="${title:0:100}"
|
|
|
|
# Extract user messages — ChatGPT export uses a mapping dict structure
|
|
# Mapping: { uuid: { id, message: { author: { role }, content: { parts: [...] } }, ... } }
|
|
# We iterate over mapping values, filter role=user, grab text parts
|
|
while IFS= read -r msg_text; do
|
|
msg_idx=$((msg_idx + 1))
|
|
echo " Importing ${msg_idx}/${total_msgs}..."
|
|
post_memory "$msg_text" "$label" '["chatgpt-import","conversation"]'
|
|
done < <(echo "$conv_json" | jq -r '
|
|
.mapping // {} |
|
|
to_entries[] |
|
|
.value.message |
|
|
select(. != null) |
|
|
select(.author.role == "user") |
|
|
.content.parts // [] |
|
|
.[] |
|
|
select(type == "string" and length > 0)
|
|
' 2>/dev/null)
|
|
|
|
done < <(jq -c '.[]' "$INPUT_FILE")
|
|
}
|
|
|
|
# ── Format: Screenpipe ─────────────────────────────────────────────────────────
|
|
import_screenpipe() {
|
|
echo "Format: Screenpipe OCR export"
|
|
|
|
# Validate: must have frames array
|
|
local top_type
|
|
top_type="$(jq -r 'type' "$INPUT_FILE" 2>/dev/null)"
|
|
if [[ "$top_type" != "object" ]]; then
|
|
echo "ERROR: Screenpipe export must be a JSON object with a 'frames' array." >&2
|
|
exit 1
|
|
fi
|
|
|
|
local frame_count
|
|
frame_count="$(jq '.frames | length' "$INPUT_FILE" 2>/dev/null || echo "0")"
|
|
echo "Found ${frame_count} frame(s) to process."
|
|
|
|
if [[ "$frame_count" == "0" ]]; then
|
|
echo "No frames found. Nothing to import."
|
|
return 0
|
|
fi
|
|
|
|
# Group frames by app_name + 5-minute window bucket
|
|
# Strategy: process sorted frames, emit a group when app or bucket changes.
|
|
# We do this in pure jq with a reduce, emitting groups as newline-delimited JSON.
|
|
|
|
local total_groups=0
|
|
local group_idx=0
|
|
|
|
# Collect groups: each group is { app, bucket_ts, texts: [...] }
|
|
# Bucket = floor(timestamp_epoch / 300) * 300 seconds
|
|
# timestamps may be ISO8601 or epoch — handle both
|
|
|
|
# We process in jq and emit one group per line as JSON
|
|
while IFS= read -r group_json; do
|
|
total_groups=$((total_groups + 1))
|
|
# Just count first
|
|
:
|
|
done < <(jq -c '
|
|
.frames |
|
|
map(select(.text != null and (.text | length) > 0)) |
|
|
group_by(.app_name) |
|
|
.[] |
|
|
. as $app_frames |
|
|
($app_frames[0].app_name) as $app |
|
|
# Sort by timestamp within app
|
|
(sort_by(.timestamp)) |
|
|
# Group into 5-minute buckets
|
|
reduce .[] as $f (
|
|
{bucket: null, texts: [], ts: null, groups: []};
|
|
($f.timestamp // "") as $ts |
|
|
# Derive numeric bucket: try epoch directly; for ISO use first 15 chars as bucket key
|
|
(if ($ts | test("^[0-9]+$")) then ($ts | tonumber / 300 | floor)
|
|
else ($ts[0:15])
|
|
end) as $bucket |
|
|
if .bucket == null then
|
|
{bucket: $bucket, texts: [$f.text], ts: $ts, groups: .groups}
|
|
elif .bucket == $bucket then
|
|
{bucket: $bucket, texts: (.texts + [$f.text]), ts: $ts, groups: .groups}
|
|
else
|
|
{bucket: $bucket, texts: [$f.text], ts: $ts,
|
|
groups: (.groups + [{app: $app, ts: .ts, texts: .texts}])}
|
|
end
|
|
) |
|
|
# flush last bucket
|
|
(.groups + [{app: .app_name, ts: .ts, texts: .texts}]) |
|
|
.[] |
|
|
select(.texts | length > 0)
|
|
' "$INPUT_FILE" 2>/dev/null)
|
|
|
|
# Now actually process
|
|
while IFS= read -r group_json; do
|
|
group_idx=$((group_idx + 1))
|
|
echo " Importing ${group_idx}..."
|
|
|
|
local app_name ts_str content label
|
|
|
|
app_name="$(echo "$group_json" | jq -r '.app // "unknown"')"
|
|
ts_str="$(echo "$group_json" | jq -r '.ts // ""')"
|
|
|
|
# Concatenate texts, truncate to 2000 chars
|
|
content="$(echo "$group_json" | jq -r '.texts | join(" ")' | cut -c1-2000)"
|
|
label="Screenpipe: ${app_name} at ${ts_str:0:16}"
|
|
|
|
local tags_json
|
|
tags_json="$(jq -n --arg app "$app_name" '["screenpipe-import","screen-capture",$app]')"
|
|
|
|
post_memory "$content" "$label" "$tags_json"
|
|
|
|
done < <(jq -c '
|
|
.frames |
|
|
map(select(.text != null and (.text | length) > 0)) |
|
|
group_by(.app_name) |
|
|
.[] |
|
|
. as $app_frames |
|
|
($app_frames[0].app_name) as $app |
|
|
(sort_by(.timestamp)) |
|
|
reduce .[] as $f (
|
|
{bucket: null, texts: [], ts: null, app: $app, groups: []};
|
|
($f.timestamp // "") as $ts |
|
|
(if ($ts | test("^[0-9]+$")) then ($ts | tonumber / 300 | floor | tostring)
|
|
else ($ts[0:15])
|
|
end) as $bucket |
|
|
if .bucket == null then
|
|
{bucket: $bucket, texts: [$f.text], ts: $ts, app: $app, groups: .groups}
|
|
elif .bucket == $bucket then
|
|
{bucket: $bucket, texts: (.texts + [$f.text]), ts: $ts, app: $app, groups: .groups}
|
|
else
|
|
{bucket: $bucket, texts: [$f.text], ts: $ts, app: $app,
|
|
groups: (.groups + [{app: $app, ts: .ts, texts: .texts}])}
|
|
end
|
|
) |
|
|
(.groups + [{app: .app, ts: .ts, texts: .texts}]) |
|
|
.[] |
|
|
select(.texts | length > 0)
|
|
' "$INPUT_FILE" 2>/dev/null)
|
|
}
|
|
|
|
# ── Format: Generic ────────────────────────────────────────────────────────────
|
|
import_generic() {
|
|
echo "Format: Generic JSON/JSONL"
|
|
|
|
# Detect if JSONL (one JSON object per line) or single JSON array/object
|
|
local first_char
|
|
first_char="$(head -c1 "$INPUT_FILE" 2>/dev/null)"
|
|
|
|
local records_file
|
|
records_file="$(mktemp)"
|
|
trap 'rm -f "$records_file"' RETURN
|
|
|
|
if [[ "$first_char" == "[" ]]; then
|
|
# JSON array — explode to one object per line
|
|
jq -c '.[]' "$INPUT_FILE" > "$records_file" 2>/dev/null || true
|
|
elif [[ "$first_char" == "{" ]]; then
|
|
# Single object or JSONL — try JSONL first
|
|
# JSONL: each line is valid JSON
|
|
# Check if the whole file is one object or multiple lines
|
|
local line_count
|
|
line_count="$(wc -l < "$INPUT_FILE" | tr -d ' ')"
|
|
if [[ "$line_count" -le 1 ]]; then
|
|
# Single object: wrap in array and explode
|
|
jq -c '[.] | .[]' "$INPUT_FILE" > "$records_file" 2>/dev/null || true
|
|
else
|
|
# Assume JSONL
|
|
cp "$INPUT_FILE" "$records_file"
|
|
fi
|
|
else
|
|
# Try JSONL anyway
|
|
cp "$INPUT_FILE" "$records_file"
|
|
fi
|
|
|
|
local total_records
|
|
total_records="$(wc -l < "$records_file" | tr -d ' ')"
|
|
echo "Found ${total_records} record(s) to process."
|
|
echo ""
|
|
|
|
local idx=0
|
|
while IFS= read -r record_json; do
|
|
[[ -z "$record_json" ]] && continue
|
|
|
|
idx=$((idx + 1))
|
|
echo " Importing ${idx}/${total_records}..."
|
|
|
|
# Extract content: prefer 'content', fall back to 'text', then 'body', then 'message'
|
|
local content
|
|
content="$(echo "$record_json" | jq -r '
|
|
if .content != null and (.content | type) == "string" then .content
|
|
elif .text != null and (.text | type) == "string" then .text
|
|
elif .body != null and (.body | type) == "string" then .body
|
|
elif .message != null and (.message | type) == "string" then .message
|
|
else ""
|
|
end
|
|
' 2>/dev/null)"
|
|
|
|
[[ -z "$content" || "$content" == "null" ]] && { SKIPPED=$((SKIPPED + 1)); continue; }
|
|
|
|
# Extract label: prefer 'title', then 'label', then 'name', then first 80 chars of content
|
|
local label
|
|
label="$(echo "$record_json" | jq -r '
|
|
if .title != null and (.title | type) == "string" then .title
|
|
elif .label != null and (.label | type) == "string" then .label
|
|
elif .name != null and (.name | type) == "string" then .name
|
|
else ""
|
|
end
|
|
' 2>/dev/null)"
|
|
|
|
if [[ -z "$label" || "$label" == "null" ]]; then
|
|
label="${content:0:80}"
|
|
fi
|
|
label="${label:0:100}"
|
|
|
|
post_memory "$content" "$label" '["imported","generic"]'
|
|
|
|
done < "$records_file"
|
|
}
|
|
|
|
# ── Main ───────────────────────────────────────────────────────────────────────
|
|
echo "Neuron Refugee Importer"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo "Source: $INPUT_FILE"
|
|
echo "Format: $FORMAT"
|
|
echo "Soul: $SOUL_HOST"
|
|
echo ""
|
|
|
|
case "$FORMAT" in
|
|
chatgpt) import_chatgpt ;;
|
|
screenpipe) import_screenpipe ;;
|
|
generic) import_generic ;;
|
|
esac
|
|
|
|
# ── Final report ───────────────────────────────────────────────────────────────
|
|
echo ""
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo "Import complete."
|
|
echo " Imported: ${IMPORTED}"
|
|
echo " Skipped: ${SKIPPED}"
|
|
echo " Errors: ${ERRORS}"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
|
|
if [[ $ERRORS -gt 0 ]]; then
|
|
exit 1
|
|
fi
|