#!/usr/bin/env bash # memory-import-refugee.sh — Import conversation/memory history from external apps into Neuron # # Usage: # ./tools/memory-import-refugee.sh --format chatgpt conversations.json # ./tools/memory-import-refugee.sh --format screenpipe screenpipe-export.json # ./tools/memory-import-refugee.sh --format generic data.json[l] # # Supported formats: # chatgpt — ChatGPT conversation export (conversations.json) # screenpipe — Screenpipe OCR export (frames array) # generic — Any JSON array or JSONL with content/text fields # # The script writes Memory nodes to the Neuron soul via its HTTP API. # The soul must be running on localhost:7770. set -euo pipefail # ── Config ───────────────────────────────────────────────────────────────────── SOUL_HOST="http://localhost:7770" # Note: POST /api/neuron/memory ignores the label field (soul hardcodes "memory:remembered"). # We embed the label in the content prefix so it is searchable. MEMORY_API="${SOUL_HOST}/api/neuron/memory" SLEEP_MS=100 # ms between API calls (rate limiting) # ── Dependency check ─────────────────────────────────────────────────────────── if ! command -v jq &>/dev/null; then echo "ERROR: jq is required but not installed." >&2 echo "" >&2 echo "Install it with:" >&2 echo " macOS: brew install jq" >&2 echo " Ubuntu: sudo apt-get install jq" >&2 echo " Alpine: apk add jq" >&2 exit 1 fi # ── Parse args ───────────────────────────────────────────────────────────────── FORMAT="" INPUT_FILE="" while [[ $# -gt 0 ]]; do case "$1" in --format|-f) FORMAT="$2" shift 2 ;; --format=*|-f=*) FORMAT="${1#*=}" shift ;; -*) echo "Unknown option: $1" >&2 echo "Usage: $0 --format " >&2 exit 1 ;; *) if [[ -z "$INPUT_FILE" ]]; then INPUT_FILE="$1" else echo "Unexpected argument: $1" >&2 exit 1 fi shift ;; esac done if [[ -z "$FORMAT" ]]; then echo "ERROR: --format is required." >&2 echo "Usage: $0 --format " >&2 exit 1 fi if [[ -z "$INPUT_FILE" ]]; then echo "ERROR: No input file specified." >&2 echo "Usage: $0 --format " >&2 exit 1 fi if [[ ! -f "$INPUT_FILE" ]]; then echo "ERROR: Input file not found: $INPUT_FILE" >&2 exit 1 fi case "$FORMAT" in chatgpt|screenpipe|generic) ;; *) echo "ERROR: Unknown format: $FORMAT" >&2 echo "Supported formats: chatgpt, screenpipe, generic" >&2 exit 1 ;; esac # ── Soul health check ────────────────────────────────────────────────────────── HTTP_CODE="$(curl -s -o /dev/null -w "%{http_code}" "${SOUL_HOST}/api/neuron/memory" 2>/dev/null || echo "000")" if [[ "$HTTP_CODE" == "000" ]]; then echo "ERROR: Neuron soul is not responding at ${SOUL_HOST}." >&2 echo " Start the soul service and retry." >&2 exit 1 fi # ── Counters ─────────────────────────────────────────────────────────────────── IMPORTED=0 SKIPPED=0 ERRORS=0 # ── Helper: post one memory node ─────────────────────────────────────────────── # post_memory CONTENT LABEL TAGS_JSON # # Note: the soul's POST /api/neuron/memory API ignores the label field (hardcodes # it to "memory:remembered"). We embed the label as a prefix in the content so # the title remains searchable via recall/search. post_memory() { local content="$1" local label="$2" local tags_json="$3" # Skip empty content if [[ -z "$content" || "$content" == "null" ]]; then SKIPPED=$((SKIPPED + 1)) return 0 fi # Embed label in content so it's searchable (the API ignores the label field) local full_content="[${label}] ${content}" local payload payload="$(jq -n \ --arg content "$full_content" \ --arg label "$label" \ --argjson tags "$tags_json" \ '{content: $content, label: $label, tags: $tags}')" local response response="$(curl -s -X POST "$MEMORY_API" \ -H "Content-Type: application/json" \ -d "$payload" 2>/dev/null)" local ok ok="$(echo "$response" | jq -r '.ok // "false"' 2>/dev/null)" if [[ "$ok" == "true" ]]; then IMPORTED=$((IMPORTED + 1)) else ERRORS=$((ERRORS + 1)) echo " [ERROR] API error for label \"${label:0:60}\": $response" >&2 fi # Rate limit: sleep 100ms sleep "0.${SLEEP_MS}" } # ── Format: ChatGPT ──────────────────────────────────────────────────────────── import_chatgpt() { echo "Format: ChatGPT conversation export" # Validate: must be JSON array at top level local top_type top_type="$(jq -r 'type' "$INPUT_FILE" 2>/dev/null)" if [[ "$top_type" != "array" ]]; then echo "ERROR: ChatGPT export must be a JSON array of conversations." >&2 exit 1 fi local conv_count conv_count="$(jq 'length' "$INPUT_FILE")" echo "Found ${conv_count} conversation(s) to process." echo "" # Count total user messages for progress display local total_msgs total_msgs="$(jq '[.[].mapping // {} | to_entries[] | .value.message | select(. != null and .author.role == "user") | .content.parts // [] | .[] | select(type == "string" and length > 0)] | length' "$INPUT_FILE" 2>/dev/null || echo "?")" echo "Total user messages: ${total_msgs}" echo "" local msg_idx=0 # Process each conversation while IFS= read -r conv_json; do local title title="$(echo "$conv_json" | jq -r '.title // "Untitled"')" # Truncate label to 100 chars local label="${title:0:100}" # Extract user messages — ChatGPT export uses a mapping dict structure # Mapping: { uuid: { id, message: { author: { role }, content: { parts: [...] } }, ... } } # We iterate over mapping values, filter role=user, grab text parts while IFS= read -r msg_text; do msg_idx=$((msg_idx + 1)) echo " Importing ${msg_idx}/${total_msgs}..." post_memory "$msg_text" "$label" '["chatgpt-import","conversation"]' done < <(echo "$conv_json" | jq -r ' .mapping // {} | to_entries[] | .value.message | select(. != null) | select(.author.role == "user") | .content.parts // [] | .[] | select(type == "string" and length > 0) ' 2>/dev/null) done < <(jq -c '.[]' "$INPUT_FILE") } # ── Format: Screenpipe ───────────────────────────────────────────────────────── import_screenpipe() { echo "Format: Screenpipe OCR export" # Validate: must have frames array local top_type top_type="$(jq -r 'type' "$INPUT_FILE" 2>/dev/null)" if [[ "$top_type" != "object" ]]; then echo "ERROR: Screenpipe export must be a JSON object with a 'frames' array." >&2 exit 1 fi local frame_count frame_count="$(jq '.frames | length' "$INPUT_FILE" 2>/dev/null || echo "0")" echo "Found ${frame_count} frame(s) to process." if [[ "$frame_count" == "0" ]]; then echo "No frames found. Nothing to import." return 0 fi # Group frames by app_name + 5-minute window bucket # Strategy: process sorted frames, emit a group when app or bucket changes. # We do this in pure jq with a reduce, emitting groups as newline-delimited JSON. local total_groups=0 local group_idx=0 # Collect groups: each group is { app, bucket_ts, texts: [...] } # Bucket = floor(timestamp_epoch / 300) * 300 seconds # timestamps may be ISO8601 or epoch — handle both # We process in jq and emit one group per line as JSON while IFS= read -r group_json; do total_groups=$((total_groups + 1)) # Just count first : done < <(jq -c ' .frames | map(select(.text != null and (.text | length) > 0)) | group_by(.app_name) | .[] | . as $app_frames | ($app_frames[0].app_name) as $app | # Sort by timestamp within app (sort_by(.timestamp)) | # Group into 5-minute buckets reduce .[] as $f ( {bucket: null, texts: [], ts: null, groups: []}; ($f.timestamp // "") as $ts | # Derive numeric bucket: try epoch directly; for ISO use first 15 chars as bucket key (if ($ts | test("^[0-9]+$")) then ($ts | tonumber / 300 | floor) else ($ts[0:15]) end) as $bucket | if .bucket == null then {bucket: $bucket, texts: [$f.text], ts: $ts, groups: .groups} elif .bucket == $bucket then {bucket: $bucket, texts: (.texts + [$f.text]), ts: $ts, groups: .groups} else {bucket: $bucket, texts: [$f.text], ts: $ts, groups: (.groups + [{app: $app, ts: .ts, texts: .texts}])} end ) | # flush last bucket (.groups + [{app: .app_name, ts: .ts, texts: .texts}]) | .[] | select(.texts | length > 0) ' "$INPUT_FILE" 2>/dev/null) # Now actually process while IFS= read -r group_json; do group_idx=$((group_idx + 1)) echo " Importing ${group_idx}..." local app_name ts_str content label app_name="$(echo "$group_json" | jq -r '.app // "unknown"')" ts_str="$(echo "$group_json" | jq -r '.ts // ""')" # Concatenate texts, truncate to 2000 chars content="$(echo "$group_json" | jq -r '.texts | join(" ")' | cut -c1-2000)" label="Screenpipe: ${app_name} at ${ts_str:0:16}" local tags_json tags_json="$(jq -n --arg app "$app_name" '["screenpipe-import","screen-capture",$app]')" post_memory "$content" "$label" "$tags_json" done < <(jq -c ' .frames | map(select(.text != null and (.text | length) > 0)) | group_by(.app_name) | .[] | . as $app_frames | ($app_frames[0].app_name) as $app | (sort_by(.timestamp)) | reduce .[] as $f ( {bucket: null, texts: [], ts: null, app: $app, groups: []}; ($f.timestamp // "") as $ts | (if ($ts | test("^[0-9]+$")) then ($ts | tonumber / 300 | floor | tostring) else ($ts[0:15]) end) as $bucket | if .bucket == null then {bucket: $bucket, texts: [$f.text], ts: $ts, app: $app, groups: .groups} elif .bucket == $bucket then {bucket: $bucket, texts: (.texts + [$f.text]), ts: $ts, app: $app, groups: .groups} else {bucket: $bucket, texts: [$f.text], ts: $ts, app: $app, groups: (.groups + [{app: $app, ts: .ts, texts: .texts}])} end ) | (.groups + [{app: .app, ts: .ts, texts: .texts}]) | .[] | select(.texts | length > 0) ' "$INPUT_FILE" 2>/dev/null) } # ── Format: Generic ──────────────────────────────────────────────────────────── import_generic() { echo "Format: Generic JSON/JSONL" # Detect if JSONL (one JSON object per line) or single JSON array/object local first_char first_char="$(head -c1 "$INPUT_FILE" 2>/dev/null)" local records_file records_file="$(mktemp)" trap 'rm -f "$records_file"' RETURN if [[ "$first_char" == "[" ]]; then # JSON array — explode to one object per line jq -c '.[]' "$INPUT_FILE" > "$records_file" 2>/dev/null || true elif [[ "$first_char" == "{" ]]; then # Single object or JSONL — try JSONL first # JSONL: each line is valid JSON # Check if the whole file is one object or multiple lines local line_count line_count="$(wc -l < "$INPUT_FILE" | tr -d ' ')" if [[ "$line_count" -le 1 ]]; then # Single object: wrap in array and explode jq -c '[.] | .[]' "$INPUT_FILE" > "$records_file" 2>/dev/null || true else # Assume JSONL cp "$INPUT_FILE" "$records_file" fi else # Try JSONL anyway cp "$INPUT_FILE" "$records_file" fi local total_records total_records="$(wc -l < "$records_file" | tr -d ' ')" echo "Found ${total_records} record(s) to process." echo "" local idx=0 while IFS= read -r record_json; do [[ -z "$record_json" ]] && continue idx=$((idx + 1)) echo " Importing ${idx}/${total_records}..." # Extract content: prefer 'content', fall back to 'text', then 'body', then 'message' local content content="$(echo "$record_json" | jq -r ' if .content != null and (.content | type) == "string" then .content elif .text != null and (.text | type) == "string" then .text elif .body != null and (.body | type) == "string" then .body elif .message != null and (.message | type) == "string" then .message else "" end ' 2>/dev/null)" [[ -z "$content" || "$content" == "null" ]] && { SKIPPED=$((SKIPPED + 1)); continue; } # Extract label: prefer 'title', then 'label', then 'name', then first 80 chars of content local label label="$(echo "$record_json" | jq -r ' if .title != null and (.title | type) == "string" then .title elif .label != null and (.label | type) == "string" then .label elif .name != null and (.name | type) == "string" then .name else "" end ' 2>/dev/null)" if [[ -z "$label" || "$label" == "null" ]]; then label="${content:0:80}" fi label="${label:0:100}" post_memory "$content" "$label" '["imported","generic"]' done < "$records_file" } # ── Main ─────────────────────────────────────────────────────────────────────── echo "Neuron Refugee Importer" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "Source: $INPUT_FILE" echo "Format: $FORMAT" echo "Soul: $SOUL_HOST" echo "" case "$FORMAT" in chatgpt) import_chatgpt ;; screenpipe) import_screenpipe ;; generic) import_generic ;; esac # ── Final report ─────────────────────────────────────────────────────────────── echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "Import complete." echo " Imported: ${IMPORTED}" echo " Skipped: ${SKIPPED}" echo " Errors: ${ERRORS}" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" if [[ $ERRORS -gt 0 ]]; then exit 1 fi