Files
neuron/tools/memory-import-refugee.sh
T
will.anderson dcc0bf550a Add Ollama provider, portable memory, cultivation digest, refugee importer, GLM-OCR spike
- P0: unified soul binary with engram_node_full fix, read-back-verify, search fix
- P0: move API keys from plaintext plists to macOS Keychain
- P0: fix MCP backend URL (port 8742 → 7770)
- P1.6: memory-export/import scripts (AES-256-CBC, versioned .neuronmem format)
- P1.7: nightly cultivation digest with sharpness metric (launchd at 23:55)
- P2.10: Ollama provider in agentic loop (SOUL_LLM_PROVIDER=ollama)
- P3.12: refugee importer for ChatGPT/Screenpipe/generic formats
- P3.13: GLM-OCR spike — SHIP IT (mlx-vlm, 1.59GB, photo-to-memory.sh)
2026-06-27 11:46:30 -05:00

428 lines
15 KiB
Bash
Executable File

#!/usr/bin/env bash
# memory-import-refugee.sh — Import conversation/memory history from external apps into Neuron
#
# Usage:
# ./tools/memory-import-refugee.sh --format chatgpt conversations.json
# ./tools/memory-import-refugee.sh --format screenpipe screenpipe-export.json
# ./tools/memory-import-refugee.sh --format generic data.json[l]
#
# Supported formats:
# chatgpt — ChatGPT conversation export (conversations.json)
# screenpipe — Screenpipe OCR export (frames array)
# generic — Any JSON array or JSONL with content/text fields
#
# The script writes Memory nodes to the Neuron soul via its HTTP API.
# The soul must be running on localhost:7770.
set -euo pipefail
# ── Config ─────────────────────────────────────────────────────────────────────
SOUL_HOST="http://localhost:7770"
# Note: POST /api/neuron/memory ignores the label field (soul hardcodes "memory:remembered").
# We embed the label in the content prefix so it is searchable.
MEMORY_API="${SOUL_HOST}/api/neuron/memory"
SLEEP_MS=100 # ms between API calls (rate limiting)
# ── Dependency check ───────────────────────────────────────────────────────────
if ! command -v jq &>/dev/null; then
echo "ERROR: jq is required but not installed." >&2
echo "" >&2
echo "Install it with:" >&2
echo " macOS: brew install jq" >&2
echo " Ubuntu: sudo apt-get install jq" >&2
echo " Alpine: apk add jq" >&2
exit 1
fi
# ── Parse args ─────────────────────────────────────────────────────────────────
FORMAT=""
INPUT_FILE=""
while [[ $# -gt 0 ]]; do
case "$1" in
--format|-f)
FORMAT="$2"
shift 2
;;
--format=*|-f=*)
FORMAT="${1#*=}"
shift
;;
-*)
echo "Unknown option: $1" >&2
echo "Usage: $0 --format <chatgpt|screenpipe|generic> <input-file>" >&2
exit 1
;;
*)
if [[ -z "$INPUT_FILE" ]]; then
INPUT_FILE="$1"
else
echo "Unexpected argument: $1" >&2
exit 1
fi
shift
;;
esac
done
if [[ -z "$FORMAT" ]]; then
echo "ERROR: --format is required." >&2
echo "Usage: $0 --format <chatgpt|screenpipe|generic> <input-file>" >&2
exit 1
fi
if [[ -z "$INPUT_FILE" ]]; then
echo "ERROR: No input file specified." >&2
echo "Usage: $0 --format <chatgpt|screenpipe|generic> <input-file>" >&2
exit 1
fi
if [[ ! -f "$INPUT_FILE" ]]; then
echo "ERROR: Input file not found: $INPUT_FILE" >&2
exit 1
fi
case "$FORMAT" in
chatgpt|screenpipe|generic) ;;
*)
echo "ERROR: Unknown format: $FORMAT" >&2
echo "Supported formats: chatgpt, screenpipe, generic" >&2
exit 1
;;
esac
# ── Soul health check ──────────────────────────────────────────────────────────
HTTP_CODE="$(curl -s -o /dev/null -w "%{http_code}" "${SOUL_HOST}/api/neuron/memory" 2>/dev/null || echo "000")"
if [[ "$HTTP_CODE" == "000" ]]; then
echo "ERROR: Neuron soul is not responding at ${SOUL_HOST}." >&2
echo " Start the soul service and retry." >&2
exit 1
fi
# ── Counters ───────────────────────────────────────────────────────────────────
IMPORTED=0
SKIPPED=0
ERRORS=0
# ── Helper: post one memory node ───────────────────────────────────────────────
# post_memory CONTENT LABEL TAGS_JSON
#
# Note: the soul's POST /api/neuron/memory API ignores the label field (hardcodes
# it to "memory:remembered"). We embed the label as a prefix in the content so
# the title remains searchable via recall/search.
post_memory() {
local content="$1"
local label="$2"
local tags_json="$3"
# Skip empty content
if [[ -z "$content" || "$content" == "null" ]]; then
SKIPPED=$((SKIPPED + 1))
return 0
fi
# Embed label in content so it's searchable (the API ignores the label field)
local full_content="[${label}] ${content}"
local payload
payload="$(jq -n \
--arg content "$full_content" \
--arg label "$label" \
--argjson tags "$tags_json" \
'{content: $content, label: $label, tags: $tags}')"
local response
response="$(curl -s -X POST "$MEMORY_API" \
-H "Content-Type: application/json" \
-d "$payload" 2>/dev/null)"
local ok
ok="$(echo "$response" | jq -r '.ok // "false"' 2>/dev/null)"
if [[ "$ok" == "true" ]]; then
IMPORTED=$((IMPORTED + 1))
else
ERRORS=$((ERRORS + 1))
echo " [ERROR] API error for label \"${label:0:60}\": $response" >&2
fi
# Rate limit: sleep 100ms
sleep "0.${SLEEP_MS}"
}
# ── Format: ChatGPT ────────────────────────────────────────────────────────────
import_chatgpt() {
echo "Format: ChatGPT conversation export"
# Validate: must be JSON array at top level
local top_type
top_type="$(jq -r 'type' "$INPUT_FILE" 2>/dev/null)"
if [[ "$top_type" != "array" ]]; then
echo "ERROR: ChatGPT export must be a JSON array of conversations." >&2
exit 1
fi
local conv_count
conv_count="$(jq 'length' "$INPUT_FILE")"
echo "Found ${conv_count} conversation(s) to process."
echo ""
# Count total user messages for progress display
local total_msgs
total_msgs="$(jq '[.[].mapping // {} | to_entries[] | .value.message | select(. != null and .author.role == "user") | .content.parts // [] | .[] | select(type == "string" and length > 0)] | length' "$INPUT_FILE" 2>/dev/null || echo "?")"
echo "Total user messages: ${total_msgs}"
echo ""
local msg_idx=0
# Process each conversation
while IFS= read -r conv_json; do
local title
title="$(echo "$conv_json" | jq -r '.title // "Untitled"')"
# Truncate label to 100 chars
local label="${title:0:100}"
# Extract user messages — ChatGPT export uses a mapping dict structure
# Mapping: { uuid: { id, message: { author: { role }, content: { parts: [...] } }, ... } }
# We iterate over mapping values, filter role=user, grab text parts
while IFS= read -r msg_text; do
msg_idx=$((msg_idx + 1))
echo " Importing ${msg_idx}/${total_msgs}..."
post_memory "$msg_text" "$label" '["chatgpt-import","conversation"]'
done < <(echo "$conv_json" | jq -r '
.mapping // {} |
to_entries[] |
.value.message |
select(. != null) |
select(.author.role == "user") |
.content.parts // [] |
.[] |
select(type == "string" and length > 0)
' 2>/dev/null)
done < <(jq -c '.[]' "$INPUT_FILE")
}
# ── Format: Screenpipe ─────────────────────────────────────────────────────────
import_screenpipe() {
echo "Format: Screenpipe OCR export"
# Validate: must have frames array
local top_type
top_type="$(jq -r 'type' "$INPUT_FILE" 2>/dev/null)"
if [[ "$top_type" != "object" ]]; then
echo "ERROR: Screenpipe export must be a JSON object with a 'frames' array." >&2
exit 1
fi
local frame_count
frame_count="$(jq '.frames | length' "$INPUT_FILE" 2>/dev/null || echo "0")"
echo "Found ${frame_count} frame(s) to process."
if [[ "$frame_count" == "0" ]]; then
echo "No frames found. Nothing to import."
return 0
fi
# Group frames by app_name + 5-minute window bucket
# Strategy: process sorted frames, emit a group when app or bucket changes.
# We do this in pure jq with a reduce, emitting groups as newline-delimited JSON.
local total_groups=0
local group_idx=0
# Collect groups: each group is { app, bucket_ts, texts: [...] }
# Bucket = floor(timestamp_epoch / 300) * 300 seconds
# timestamps may be ISO8601 or epoch — handle both
# We process in jq and emit one group per line as JSON
while IFS= read -r group_json; do
total_groups=$((total_groups + 1))
# Just count first
:
done < <(jq -c '
.frames |
map(select(.text != null and (.text | length) > 0)) |
group_by(.app_name) |
.[] |
. as $app_frames |
($app_frames[0].app_name) as $app |
# Sort by timestamp within app
(sort_by(.timestamp)) |
# Group into 5-minute buckets
reduce .[] as $f (
{bucket: null, texts: [], ts: null, groups: []};
($f.timestamp // "") as $ts |
# Derive numeric bucket: try epoch directly; for ISO use first 15 chars as bucket key
(if ($ts | test("^[0-9]+$")) then ($ts | tonumber / 300 | floor)
else ($ts[0:15])
end) as $bucket |
if .bucket == null then
{bucket: $bucket, texts: [$f.text], ts: $ts, groups: .groups}
elif .bucket == $bucket then
{bucket: $bucket, texts: (.texts + [$f.text]), ts: $ts, groups: .groups}
else
{bucket: $bucket, texts: [$f.text], ts: $ts,
groups: (.groups + [{app: $app, ts: .ts, texts: .texts}])}
end
) |
# flush last bucket
(.groups + [{app: .app_name, ts: .ts, texts: .texts}]) |
.[] |
select(.texts | length > 0)
' "$INPUT_FILE" 2>/dev/null)
# Now actually process
while IFS= read -r group_json; do
group_idx=$((group_idx + 1))
echo " Importing ${group_idx}..."
local app_name ts_str content label
app_name="$(echo "$group_json" | jq -r '.app // "unknown"')"
ts_str="$(echo "$group_json" | jq -r '.ts // ""')"
# Concatenate texts, truncate to 2000 chars
content="$(echo "$group_json" | jq -r '.texts | join(" ")' | cut -c1-2000)"
label="Screenpipe: ${app_name} at ${ts_str:0:16}"
local tags_json
tags_json="$(jq -n --arg app "$app_name" '["screenpipe-import","screen-capture",$app]')"
post_memory "$content" "$label" "$tags_json"
done < <(jq -c '
.frames |
map(select(.text != null and (.text | length) > 0)) |
group_by(.app_name) |
.[] |
. as $app_frames |
($app_frames[0].app_name) as $app |
(sort_by(.timestamp)) |
reduce .[] as $f (
{bucket: null, texts: [], ts: null, app: $app, groups: []};
($f.timestamp // "") as $ts |
(if ($ts | test("^[0-9]+$")) then ($ts | tonumber / 300 | floor | tostring)
else ($ts[0:15])
end) as $bucket |
if .bucket == null then
{bucket: $bucket, texts: [$f.text], ts: $ts, app: $app, groups: .groups}
elif .bucket == $bucket then
{bucket: $bucket, texts: (.texts + [$f.text]), ts: $ts, app: $app, groups: .groups}
else
{bucket: $bucket, texts: [$f.text], ts: $ts, app: $app,
groups: (.groups + [{app: $app, ts: .ts, texts: .texts}])}
end
) |
(.groups + [{app: .app, ts: .ts, texts: .texts}]) |
.[] |
select(.texts | length > 0)
' "$INPUT_FILE" 2>/dev/null)
}
# ── Format: Generic ────────────────────────────────────────────────────────────
import_generic() {
echo "Format: Generic JSON/JSONL"
# Detect if JSONL (one JSON object per line) or single JSON array/object
local first_char
first_char="$(head -c1 "$INPUT_FILE" 2>/dev/null)"
local records_file
records_file="$(mktemp)"
trap 'rm -f "$records_file"' RETURN
if [[ "$first_char" == "[" ]]; then
# JSON array — explode to one object per line
jq -c '.[]' "$INPUT_FILE" > "$records_file" 2>/dev/null || true
elif [[ "$first_char" == "{" ]]; then
# Single object or JSONL — try JSONL first
# JSONL: each line is valid JSON
# Check if the whole file is one object or multiple lines
local line_count
line_count="$(wc -l < "$INPUT_FILE" | tr -d ' ')"
if [[ "$line_count" -le 1 ]]; then
# Single object: wrap in array and explode
jq -c '[.] | .[]' "$INPUT_FILE" > "$records_file" 2>/dev/null || true
else
# Assume JSONL
cp "$INPUT_FILE" "$records_file"
fi
else
# Try JSONL anyway
cp "$INPUT_FILE" "$records_file"
fi
local total_records
total_records="$(wc -l < "$records_file" | tr -d ' ')"
echo "Found ${total_records} record(s) to process."
echo ""
local idx=0
while IFS= read -r record_json; do
[[ -z "$record_json" ]] && continue
idx=$((idx + 1))
echo " Importing ${idx}/${total_records}..."
# Extract content: prefer 'content', fall back to 'text', then 'body', then 'message'
local content
content="$(echo "$record_json" | jq -r '
if .content != null and (.content | type) == "string" then .content
elif .text != null and (.text | type) == "string" then .text
elif .body != null and (.body | type) == "string" then .body
elif .message != null and (.message | type) == "string" then .message
else ""
end
' 2>/dev/null)"
[[ -z "$content" || "$content" == "null" ]] && { SKIPPED=$((SKIPPED + 1)); continue; }
# Extract label: prefer 'title', then 'label', then 'name', then first 80 chars of content
local label
label="$(echo "$record_json" | jq -r '
if .title != null and (.title | type) == "string" then .title
elif .label != null and (.label | type) == "string" then .label
elif .name != null and (.name | type) == "string" then .name
else ""
end
' 2>/dev/null)"
if [[ -z "$label" || "$label" == "null" ]]; then
label="${content:0:80}"
fi
label="${label:0:100}"
post_memory "$content" "$label" '["imported","generic"]'
done < "$records_file"
}
# ── Main ───────────────────────────────────────────────────────────────────────
echo "Neuron Refugee Importer"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Source: $INPUT_FILE"
echo "Format: $FORMAT"
echo "Soul: $SOUL_HOST"
echo ""
case "$FORMAT" in
chatgpt) import_chatgpt ;;
screenpipe) import_screenpipe ;;
generic) import_generic ;;
esac
# ── Final report ───────────────────────────────────────────────────────────────
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Import complete."
echo " Imported: ${IMPORTED}"
echo " Skipped: ${SKIPPED}"
echo " Errors: ${ERRORS}"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
if [[ $ERRORS -gt 0 ]]; then
exit 1
fi