#!/bin/sh
# =============================================================================
# Srasta Platform — Quick Install
#
# ⚠ THIS FILE HAS TWO HOMES.  Keep them BYTE-IDENTICAL:
#     scripts/get-srasta.sh        ← canonical source (this file)
#     website/public/install.sh    ← deployed at https://get.srasta.ai
#
# When editing one, copy to the other in the same commit.  A future
# CI check (#291 follow-up) will assert they match; until then, the
# convention lives here.
# =============================================================================
#
# Default mode (today's behavior):
#   curl -fsSL https://get.srasta.ai | sh
#   - docker pulls registry.gitlab.com/gandiva/srasta-images/installer:main
#   - runs the installer wizard on port 17724 (Srasta host-port band)
#
# Bundle mode (P3.2.2 of #165):
#   curl -fsSL https://get.srasta.ai | sh -s -- --bundle v1.0.0
#   - downloads the v1.0.0 release bundle from GitLab Releases
#   - cosign-verifies the signature (Sigstore-keyless)
#   - sha256-verifies the tarball
#   - extracts + runs the installer image at the digest pinned in the
#     bundle's release-manifest.json.  Cryptographically anchored install.
#
# The installer handles everything else: hardware detection, model selection,
# service deployment, secrets management, and TLS certificate provisioning.
# =============================================================================
set -e

# SRASTA_IMAGE_TAG selects the image CHANNEL (main | dev | <branch>) for the
# installer AND every gandiva service image (docker-compose.yml resolves them via
# the same var) — `SRASTA_IMAGE_TAG=dev curl … | sh` tests the whole stack against
# the dev branch's freshly-built images, no dev->main promotion.  Default: main.
# (Bundle mode overrides INSTALLER_IMAGE with a pinned digest below, so channel
# only affects the unpinned default path.)
SRASTA_IMAGE_TAG="${SRASTA_IMAGE_TAG:-main}"
INSTALLER_IMAGE="${SRASTA_INSTALLER_IMAGE:-registry.gitlab.com/gandiva/srasta-images/installer:${SRASTA_IMAGE_TAG}}"
INSTALLER_PORT="${SRASTA_PORT:-17724}"
# Bind address for the host-side port mapping.  Default 127.0.0.1 (safe —
# loopback only) for the "I'm running this on the box I'll browse from"
# case.  Set to 0.0.0.0 when the operator drives the wizard from a
# different machine (laptop → cluster node over LAN/Tailscale).  Operator-
# token auth (#353) makes 0.0.0.0 safe — only the token-holder can hit
# privileged routes; the wizard URL with `?token=` is the entry point.
INSTALLER_BIND_ADDR="${SRASTA_INSTALLER_BIND_ADDR:-127.0.0.1}"
CONTAINER_NAME="srasta-installer"
BUNDLE_VERSION=""
SRASTA_GITLAB_URL="${SRASTA_GITLAB_URL:-https://gitlab.com/gandiva-tech/srasta}"

# Parse args
while [ $# -gt 0 ]; do
    case "$1" in
        --port)    INSTALLER_PORT="$2"; shift 2 ;;
        --bind)    INSTALLER_BIND_ADDR="$2"; shift 2 ;;
        --image)   INSTALLER_IMAGE="$2"; shift 2 ;;
        --name)    CONTAINER_NAME="$2";  shift 2 ;;
        --bundle)  BUNDLE_VERSION="$2";  shift 2 ;;
        *)         echo "Unknown option: $1"; exit 1 ;;
    esac
done

echo ""
echo "  ┌─────────────────────────────────────┐"
echo "  │       Srasta Platform Installer      │"
echo "  └─────────────────────────────────────┘"
echo ""

# ── Preflight ────────────────────────────────────────────────────────────────

if ! command -v docker >/dev/null 2>&1; then
    echo "ERROR: Docker is not installed."
    echo "  Install Docker: https://docs.docker.com/get-docker/"
    exit 1
fi
if ! docker info >/dev/null 2>&1; then
    # Distinguish "daemon not running" from "user not in docker group" —
    # they look the same to Docker CLI but the fix is very different.
    # See #201 §3 (install bootstrap pre-flight checks).
    if [ -S /var/run/docker.sock ]; then
        echo "ERROR: Cannot talk to Docker daemon, but the socket exists at"
        echo "  /var/run/docker.sock.  Most likely your user is not in the"
        echo "  'docker' group."
        echo ""
        echo "  Fix:"
        echo "    sudo usermod -aG docker \$USER"
        echo "    # then log out and back in, OR run 'newgrp docker' in this shell"
        echo ""
        echo "  Verify with:  groups | grep docker"
    else
        echo "ERROR: Docker daemon is not running."
        echo "  Start Docker and try again:"
        echo "    sudo systemctl start docker  # Linux"
        echo "    open -a Docker                # macOS"
    fi
    exit 1
fi
if ! docker compose version >/dev/null 2>&1; then
    echo "ERROR: Docker Compose v2 plugin is missing."
    echo "  Install: https://docs.docker.com/compose/install/"
    echo "  Verify with:  docker compose version"
    exit 1
fi
# Pre-flight check for SSH keypair — multi-host installs SSH from the
# control plane to worker nodes.  Operators on a fresh box often don't
# have a keypair yet; offering to generate one here saves a wizard
# dead-end at the multi-host SSH step.  See #201 §3.
if [ ! -f "${HOME}/.ssh/id_ed25519" ] && [ ! -f "${HOME}/.ssh/id_rsa" ]; then
    echo "WARNING: No SSH keypair found at ~/.ssh/id_ed25519 or ~/.ssh/id_rsa."
    echo "  Multi-host installs need this for the control plane to reach"
    echo "  worker nodes.  Single-host installs don't need it."
    echo ""
    echo "  To generate one:"
    echo "    ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -N ''"
    echo ""
    echo "  Continuing — you can skip this if you only need single-host."
fi
if command -v lsof >/dev/null 2>&1; then
    if lsof -i ":${INSTALLER_PORT}" >/dev/null 2>&1; then
        echo "ERROR: Port ${INSTALLER_PORT} is already in use."
        echo "  Use --port N to choose a different port."
        exit 1
    fi
fi

# ── Production warning (#291): nudge default-mode users toward verified bundles ─

if [ -z "$BUNDLE_VERSION" ]; then
    echo "  ┌──────────────────────────────────────────────────────────────────┐"
    echo "  │  ⚠ Default mode pulls installer:main by tag — no signature       │"
    echo "  │    verification.  For production / regulated installs, use:      │"
    echo "  │                                                                  │"
    echo "  │      curl -fsSL https://get.srasta.ai | sh -s -- --bundle vX.Y.Z │"
    echo "  │                                                                  │"
    echo "  │    Bundle mode cosign-verifies the release before extracting     │"
    echo "  │    and pins the installer image by sha256 digest.  See:          │"
    echo "  │      https://srasta.ai/security  /  docs/operations/             │"
    echo "  │      release-verification-runbook.md                             │"
    echo "  └──────────────────────────────────────────────────────────────────┘"
    echo ""
fi

# ── Bundle mode ──────────────────────────────────────────────────────────────

if [ -n "$BUNDLE_VERSION" ]; then
    echo "Bundle mode: ${BUNDLE_VERSION}"
    for cmd in curl tar sha256sum cosign jq; do
        if ! command -v "$cmd" >/dev/null 2>&1; then
            echo "ERROR: bundle mode requires '$cmd' (not found in PATH)."
            echo "  cosign:   brew install cosign  / github.com/sigstore/cosign"
            echo "  jq + tar: usually preinstalled or via your package manager"
            exit 1
        fi
    done

    WORKDIR="$(mktemp -d -t srasta-install.XXXXXX)"
    trap 'rm -rf "$WORKDIR"' EXIT

    BASE_URL="${SRASTA_GITLAB_URL}/-/releases/${BUNDLE_VERSION}/downloads"
    BUNDLE="srasta-${BUNDLE_VERSION}.tar.gz"

    echo "Fetching bundle from ${BASE_URL}/${BUNDLE} ..."
    curl -fsSL -o "${WORKDIR}/${BUNDLE}"          "${BASE_URL}/${BUNDLE}"
    curl -fsSL -o "${WORKDIR}/${BUNDLE}.sha256"   "${BASE_URL}/${BUNDLE}.sha256"
    curl -fsSL -o "${WORKDIR}/${BUNDLE}.sig"      "${BASE_URL}/${BUNDLE}.sig"

    echo "Verifying sha256 ..."
    ( cd "$WORKDIR" && sha256sum -c "${BUNDLE}.sha256" )

    echo "Verifying cosign signature (Sigstore keyless) ..."
    cosign verify-blob \
        --certificate-identity-regexp 'https://gitlab\.com/gandiva-tech/srasta//\.gitlab-ci\.yml@.+' \
        --certificate-oidc-issuer 'https://gitlab.com' \
        --bundle "${WORKDIR}/${BUNDLE}.sig" \
        "${WORKDIR}/${BUNDLE}" >/dev/null

    echo "Extracting ..."
    ( cd "$WORKDIR" && tar -xzf "${BUNDLE}" )

    EXTRACTED="$(find "$WORKDIR" -maxdepth 1 -type d -name 'srasta-*' | head -1)"
    if [ -z "$EXTRACTED" ]; then
        echo "ERROR: extracted bundle not found in $WORKDIR" >&2
        exit 1
    fi

    # Pull the installer by digest from the verified manifest.
    INSTALLER_DIGEST="$(jq -r '.images[] | select(.name | endswith("/installer")) | .digest' "${EXTRACTED}/release-manifest.json" | head -1)"
    INSTALLER_NAME="$(jq -r '.images[] | select(.name | endswith("/installer")) | .name' "${EXTRACTED}/release-manifest.json" | head -1)"
    if [ -z "$INSTALLER_DIGEST" ] || [ -z "$INSTALLER_NAME" ]; then
        echo "ERROR: installer image not found in bundle's release-manifest.json" >&2
        exit 1
    fi
    INSTALLER_IMAGE="${INSTALLER_NAME}@${INSTALLER_DIGEST}"
    echo "Cryptographically anchored installer: ${INSTALLER_IMAGE}"
fi

# ── Funnel telemetry: emit pull-start event (#244) ──────────────────────────
#
# Anchored on a stable INSTALL_ID UUID per operator machine — generated
# once at ~/.srasta/install-id and reused on every install retry.  The
# wizard reads the same file so heartbeats from the deployed cluster
# carry the same install_id (powers the pull-start → wizard-launch →
# topology-applied → deploy-complete → license-activated funnel report).
#
# Event POST is intentionally best-effort: timeouts after 3s, ignores
# errors, never blocks the install path.  No customer data — just the
# UUID + version + OS arch (count + shape, never content; same
# privacy contract as the heartbeat path, see /api/telemetry/v1/heartbeat).
SRASTA_FUNNEL_ENDPOINT="${SRASTA_FUNNEL_ENDPOINT:-https://srasta.ai/api/funnel/event}"
SRASTA_INSTALL_ID_FILE="${HOME}/.srasta/install-id"

if [ ! -f "$SRASTA_INSTALL_ID_FILE" ]; then
    mkdir -p "$(dirname "$SRASTA_INSTALL_ID_FILE")" 2>/dev/null || true
    if command -v uuidgen >/dev/null 2>&1; then
        uuidgen | tr '[:upper:]' '[:lower:]' > "$SRASTA_INSTALL_ID_FILE" 2>/dev/null || true
    elif [ -r /proc/sys/kernel/random/uuid ]; then
        cat /proc/sys/kernel/random/uuid > "$SRASTA_INSTALL_ID_FILE" 2>/dev/null || true
    elif command -v python3 >/dev/null 2>&1; then
        python3 -c "import uuid; print(uuid.uuid4())" > "$SRASTA_INSTALL_ID_FILE" 2>/dev/null || true
    fi
fi
SRASTA_INSTALL_ID="$(tr -d '[:space:]' < "$SRASTA_INSTALL_ID_FILE" 2>/dev/null || true)"

if [ -n "$SRASTA_INSTALL_ID" ] && command -v curl >/dev/null 2>&1; then
    SRASTA_OS="$(uname -s 2>/dev/null || echo unknown)"
    SRASTA_ARCH="$(uname -m 2>/dev/null || echo unknown)"
    SRASTA_FUNNEL_PAYLOAD=$(cat <<JSON
{"install_id":"${SRASTA_INSTALL_ID}","event_type":"pull-start","os":"${SRASTA_OS}","arch":"${SRASTA_ARCH}","payload":{"installer_image":"${INSTALLER_IMAGE}","bundle_version":"${BUNDLE_VERSION:-}"}}
JSON
)
    curl -sS -m 3 -X POST "$SRASTA_FUNNEL_ENDPOINT" \
        -H "content-type: application/json" \
        -d "$SRASTA_FUNNEL_PAYLOAD" >/dev/null 2>&1 || true
fi

# ── Pull + Start ─────────────────────────────────────────────────────────────

echo "Pulling installer image..."
docker pull "${INSTALLER_IMAGE}" || {
    echo "ERROR: Failed to pull ${INSTALLER_IMAGE}"
    echo "  Check your network or registry credentials."
    exit 1
}

# Stop existing container if any
docker rm -f "${CONTAINER_NAME}" 2>/dev/null || true

echo "Starting installer on port ${INSTALLER_PORT}..."

# State directory — where the wizard writes installer_key, state.json,
# generated .env, etc.  On Linux this lives at /opt/srasta (the standard
# third-party-software path, survives operator $HOME wipes); on macOS we
# can't bind-mount /opt because Docker Desktop blocks it via File
# Sharing by default, so we fall back to ~/.local/share/srasta-installer
# which Docker Desktop allows out of the box.
#
# The container always sees its state at /opt/srasta — only the host
# path differs.  This keeps the wizard logic + bundled-compose paths
# consistent across operator OS.
STATE_DIR_ENV=""
if [ "$(uname -s)" = "Darwin" ]; then
    SRASTA_STATE_HOST_DIR="${SRASTA_STATE_DIR:-${HOME}/.local/share/srasta-installer}"
    mkdir -p "${SRASTA_STATE_HOST_DIR}"
    # Path congruence (single-host-local E2E, 2026-06-04): the wizard drives
    # `docker compose` over the mounted socket; compose resolves bind-mount
    # SOURCES relative to the compose project dir (= the container's STATE_DIR)
    # and hands ABSOLUTE paths to the HOST daemon.  On Docker Desktop the daemon
    # only sees host paths, so the container's STATE_DIR must be IDENTICAL to
    # the host path AND under a shared root (/Users).  Mapping the host dir to
    # /opt/srasta broke deploys with "mounts denied: /opt/srasta/setup/... is
    # not shared from the host" (/opt isn't a Docker Desktop shared path and the
    # files live under ~/.local).  So bind congruently and override the image's
    # baked SRASTA_STATE_DIR=/opt/srasta to this host path.
    STATE_BIND="${SRASTA_STATE_HOST_DIR}:${SRASTA_STATE_HOST_DIR}"
    STATE_DIR_ENV="-e SRASTA_STATE_DIR=${SRASTA_STATE_HOST_DIR}"
else
    # Linux — pre-create /opt/srasta with the container's effective UID so
    # the wizard can write state there.  Without this the bind-mount lands
    # as root-owned and `_ensure_state_dir()` inside the container fails
    # with EACCES.
    #
    # Two cases:
    #   * Operator is UID 1000 → no `--user` override → container runs as
    #     baked-in srasta UID 1000 → chown to 1000 (legacy gdlab path).
    #   * Operator is UID != 1000 → install.sh adds `--user $(id -u):$(id -g)`
    #     (multi-host UID fix #25) → chown to the operator's UID so the
    #     container can write through the bind-mount.
    # Also: if /opt/srasta already exists from a previous install with the
    # wrong owner, re-chown — otherwise switching operator UIDs leaves a
    # silently-broken install.
    #
    # #363: resolve the OPERATOR's actual UID/GID HERE before the chown,
    # not at line 405 (used to be too late).  Previously the chown
    # defaulted to 1000 when _OPERATOR_UID was unset; any operator with
    # UID != 1000 then hit EACCES writing /opt/srasta/etc-hosts a few
    # lines later (caught on gdlab-spark 2026-05-25 — gdlab user is UID
    # 1001, /opt/srasta got created owned by 1000, script ran as 1001).
    _OPERATOR_UID="${_OPERATOR_UID:-$(id -u)}"
    _OPERATOR_GID="${_OPERATOR_GID:-$(id -g)}"
    _TARGET_UID="${_OPERATOR_UID}"
    _TARGET_GID="${_OPERATOR_GID}"
    if [ ! -d /opt/srasta ]; then
        sudo install -d -o "${_TARGET_UID}" -g "${_TARGET_GID}" -m 755 /opt/srasta || {
            echo "ERROR: could not create /opt/srasta — sudo required for first install."
            exit 1
        }
    else
        # Existing dir — ensure ownership matches the container's effective
        # UID even if a previous install used a different one.
        _CURRENT_OWNER="$(stat -c %u /opt/srasta 2>/dev/null || echo "")"
        if [ "${_CURRENT_OWNER}" != "${_TARGET_UID}" ]; then
            sudo chown -R "${_TARGET_UID}:${_TARGET_GID}" /opt/srasta || {
                echo "ERROR: could not re-chown /opt/srasta to ${_TARGET_UID}:${_TARGET_GID}."
                exit 1
            }
        fi
    fi
    STATE_BIND="/opt:/opt"

    # ── Phase 1 of OOM-prevention loop ────────────────────────────────────
    # Drop OS page caches to maximize MemAvailable before launching the
    # installer + smoke gate.  On unified-memory accelerators (V1 = GB10
    # Blackwell-edge per project_v1_blackwell_scope), the OS page cache
    # competes with vLLM for the same physical RAM that CUDA reports as
    # "GPU memory".  A host with 50+ GB of cached HF dataset files makes
    # CUDA see less "free GPU memory" than what the recipe budgets for,
    # producing OOM at vLLM engine init even though the scorer said the
    # model fits.  Caught 2026-05-25 on gdlab-spark single-host install
    # (Qwen3-Coder-Next-FP8 needed 60 GB, only 50 GB available because
    # 51 GB was in page cache).
    #
    # Drop is cheap (~ms); the OS re-caches on demand.  Discrete-VRAM
    # hardware (B200, RTX 50xx) is unaffected — their VRAM isn't unified
    # with host RAM; the operation is a near-free no-op there.
    # See project_oom_prevention_strategic_loop.
    _MEM_BEFORE=$(awk '/^MemAvailable:/ {print int($2/1024)}' /proc/meminfo 2>/dev/null || echo 0)
    if sudo sh -c 'sync && echo 3 > /proc/sys/vm/drop_caches' 2>/dev/null; then
        _MEM_AFTER=$(awk '/^MemAvailable:/ {print int($2/1024)}' /proc/meminfo 2>/dev/null || echo 0)
        _MEM_FREED=$(( _MEM_AFTER - _MEM_BEFORE ))
        if [ "$_MEM_FREED" -gt 100 ]; then
            echo "Freed ${_MEM_FREED} MiB of OS page cache (MemAvailable ${_MEM_BEFORE} → ${_MEM_AFTER} MiB) for inference."
        fi
    else
        echo "Note: could not drop OS page caches (sudo unavailable).  Install will proceed; on unified-memory hardware (GB10), consider running: sudo sync && sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches' before installing."
    fi
fi
# ~/.ssh is mounted READ-WRITE — the wizard's ssh-copy-id needs to mktemp
# under it for known_hosts management.  Don't mount ~/.srasta from the
# host: the wizard manages that path internally and a host mount causes
# UID-mismatch errors when ssh-keygen tries to write the installer key.
# See #201 §3 + #199.
#
# POSIX sh has no arrays so we branch the docker run line based on
# whether ~/.ssh exists.  Two near-identical invocations is the cost
# of staying portable.
#
# /opt:/opt is the state-bind mount.  After the image restructure
# (#208 follow-up): app code lives at /srv/srasta-installer, Python
# venv at /usr/local/venv — neither is shadowed by /opt:/opt.
# /opt/srasta inside the container is reserved for host-visible
# state (.env, .srasta/, operator-edited LiteLLM config) so single-
# node compose can read what the wizard writes.  Multi-host installs
# and K8s use the same image; the mount target is host /opt for
# single-node, tmpfs for multi-host, PVC for K8s.
# ── Network: inherit host name resolution ───────────────────────────────────
#
# Containers use Docker's default DNS, not the operator's resolver, so any
# host names resolved via Tailscale MagicDNS, /etc/hosts, mDNS, or
# corporate DNS are invisible inside the container.  The wizard then can't
# SSH to target hosts the operator entered by name (`gdlab-spark`,
# `prod-app-1`, etc.) and the install dies at step 1.
#
# We generate a unified /etc/hosts at ${SRASTA_STATE_HOST_DIR}/etc-hosts
# and bind-mount it read-only.  Two automatic shims, both no-op when the
# operator already has working DNS:
#
#  1. Copy the host's /etc/hosts so any static entries the operator
#     manages by hand still work inside the container.
#  2. If Tailscale is logged in, append every tailnet peer.  Catches
#     Tailscale MagicDNS users (very common in ops) without prompting.
#
# Operators with neither hit the wizard step 1 SSH error, which is now
# matched by recovery_catalog.py's `host-resolution-failed` pattern and
# surfaces a Q10a card with the IP-fallback fix.
HOSTS_BIND=""
HOSTS_FILE="${SRASTA_STATE_HOST_DIR:-/opt/srasta}/etc-hosts"
mkdir -p "$(dirname "$HOSTS_FILE")" 2>/dev/null || true
if : > "$HOSTS_FILE" 2>/dev/null; then
    if [ -r /etc/hosts ]; then
        cat /etc/hosts >> "$HOSTS_FILE"
        printf '\n' >> "$HOSTS_FILE"
    fi

    if command -v tailscale >/dev/null 2>&1 && tailscale status >/dev/null 2>&1; then
        TS_PEERS=$(tailscale status --json 2>/dev/null | python3 -c '
import json, sys
try:
    data = json.load(sys.stdin)
except Exception:
    sys.exit(0)
sources = list((data.get("Peer") or {}).values())
self_obj = data.get("Self") or {}
if self_obj:
    sources.insert(0, self_obj)
for src in sources:
    name = (src.get("HostName") or "").strip()
    ips = src.get("TailscaleIPs") or []
    if name and ips and all(c.isalnum() or c in ".-_" for c in name):
        print(f"{ips[0]} {name}")
' 2>/dev/null)
        if [ -n "$TS_PEERS" ]; then
            printf '%s\n' "$TS_PEERS" >> "$HOSTS_FILE"
            TS_COUNT=$(printf '%s\n' "$TS_PEERS" | wc -l | tr -d ' ')
            echo "Tailscale detected — auto-mapped ${TS_COUNT} tailnet peer(s) into the container."
        fi
    fi

    HOSTS_BIND="-v ${HOSTS_FILE}:/etc/hosts:ro"
fi

# GPU passthrough: single-node hardware detection runs nvidia-smi INSIDE the
# installer container, so on an NVIDIA host the container needs --gpus all
# (the nvidia-container-runtime injects nvidia-smi + driver libs).  Without
# it, detect() reports cpu_only on a GPU box and no curated GPU recipe is
# offered.  No-op on CPU hosts.
GPUS_FLAG=""
if command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L >/dev/null 2>&1; then
    GPUS_FLAG="--gpus all"
fi

# Docker socket access: the installer container runs as the non-root srasta
# user, which isn't in the host's docker group, so it can't drive the mounted
# /var/run/docker.sock (pull / compose / smoke-gate all fail with EACCES).
# This --group-add seeds the INITIAL (root) container process; the real grant
# to the gosu'd srasta user happens in srasta-entrypoint, which reads the
# socket's in-container gid and usermods srasta into it (gid 0 included — the
# Docker Desktop for Mac case).  stat -c (Linux) / stat -f (BSD/macOS).
GROUPADD=""
if [ -S /var/run/docker.sock ]; then
    _sock_gid="$(stat -c %g /var/run/docker.sock 2>/dev/null || stat -f %g /var/run/docker.sock 2>/dev/null || echo "")"
    [ -n "${_sock_gid}" ] && GROUPADD="--group-add ${_sock_gid}"
fi

# Pass the host's HF cache path into the installer container so the smoke
# gate's child containers can bind-mount it (and avoid re-downloading 40GB
# of weights per recipe).  Fix for #27 — pre-fix, every fresh install timed
# out the smoke gate on cold-disk weight download.  $HOME on the host is
# the right path; the smoke container does `docker run -v` against the
# host's docker daemon, so this path is interpreted host-side.
HF_CACHE_ENV=""
if [ -d "${HOME}/.cache/huggingface" ]; then
    HF_CACHE_ENV="-e SRASTA_HF_CACHE_DIR=${HOME}/.cache/huggingface"
fi

# Container entrypoint dynamic-UID alignment (#25 strategic fix).  Passes
# the host operator's UID/GID into the container; entrypoint.sh usermods
# srasta to match before dropping privileges via gosu.  This handles ALL
# host operator UIDs (1000, 1001, others) without --user override (which
# broke other container internals).  Defaults to 1000:1000 in entrypoint
# if unset, preserving legacy behaviour.
#
# (#363) _OPERATOR_UID/_GID resolved earlier as part of the /opt/srasta
# chown logic; preserved here as defensive defaults so the env vars
# work even on macOS (which skips the Linux chown block above).
_OPERATOR_UID="${_OPERATOR_UID:-$(id -u)}"
_OPERATOR_GID="${_OPERATOR_GID:-$(id -g)}"
TARGET_UID_ENV="-e SRASTA_TARGET_UID=${_OPERATOR_UID} -e SRASTA_TARGET_GID=${_OPERATOR_GID}"

# Host facts the installer container CAN'T see through the Docker Desktop VM:
# a container on Docker Desktop for Mac reads the VM's memory cap (e.g. ~8 GB),
# NOT the Mac's physical RAM (e.g. 48 GB).  That fooled the memory preflight
# into refusing installs on capable Macs.  This launcher runs natively on the
# host, so read the truth here and inject it.  No-op on Linux (the container
# shares the host kernel and sees true RAM — the GB10 product path is unaffected).
HOST_FACTS_ENV=""
if [ "$(uname -s 2>/dev/null)" = "Darwin" ]; then
    # A Mac is a Mac regardless of whether the probes below succeed.
    HOST_FACTS_ENV="-e SRASTA_HOST_OS=darwin"

    _host_mem_bytes="$(sysctl -n hw.memsize 2>/dev/null || echo 0)"
    case "${_host_mem_bytes}" in
        ''|*[!0-9]*) _host_mem_bytes=0 ;;
    esac
    if [ "${_host_mem_bytes}" -gt 0 ]; then
        _host_ram_gb=$(( _host_mem_bytes / 1024 / 1024 / 1024 ))
        HOST_FACTS_ENV="${HOST_FACTS_ENV} -e SRASTA_HOST_RAM_GB=${_host_ram_gb}"
    fi

    # Free disk on the volume where weights + install state land ($HOME).  The
    # container reads the Docker Desktop VM's small virtual disk, NOT the Mac's
    # real free space, so the installer's <150GB headroom recommendation
    # mis-advises without this.  POSIX `df -P` => one row per filesystem;
    # Available is the 4th column, in 1024-byte blocks.
    _host_disk_kb="$(df -P -k "${HOME}" 2>/dev/null | awk 'NR==2 {print $4}')"
    case "${_host_disk_kb}" in
        ''|*[!0-9]*) _host_disk_kb=0 ;;
    esac
    if [ "${_host_disk_kb}" -gt 0 ]; then
        _host_disk_gb=$(( _host_disk_kb / 1024 / 1024 ))
        HOST_FACTS_ENV="${HOST_FACTS_ENV} -e SRASTA_HOST_DISK_FREE_GB=${_host_disk_gb}"
    fi
fi

# -----------------------------------------------------------------------------
# Mandatory registration (P1.2d) — community installs identify themselves so we
# can support and notify operators.  The installer needs a valid email + org to
# bootstrap catalog access; without them no certified models are served.
#
# Precedence: explicit env (headless/CI) wins; otherwise prompt on a TTY.  We
# export the values and forward them by NAME (docker -e NAME) so an org name
# with spaces survives — building "-e NAME=value" would word-split on the space.
# -----------------------------------------------------------------------------
_reg_email="${SRASTA_REGISTER_EMAIL:-}"
_reg_org="${SRASTA_REGISTER_ORG:-}"
if { [ -z "${_reg_email}" ] || [ -z "${_reg_org}" ]; } && [ -r /dev/tty ]; then
    printf '\nSrasta community installs are registered so we can support and notify you.\n'
    if [ -z "${_reg_email}" ]; then
        printf 'Work email: '
        IFS= read -r _reg_email < /dev/tty || true
    fi
    if [ -z "${_reg_org}" ]; then
        printf 'Organization: '
        IFS= read -r _reg_org < /dev/tty || true
    fi
fi
_reg_email="$(printf '%s' "${_reg_email}" | tr -d '[:space:]')"
_reg_org="$(printf '%s' "${_reg_org}" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//')"
if [ -z "${_reg_email}" ] || [ -z "${_reg_org}" ]; then
    echo "Error: registration required to install Srasta." >&2
    echo "       Set SRASTA_REGISTER_EMAIL and SRASTA_REGISTER_ORG (a valid work email" >&2
    echo "       and organization), or run interactively so we can prompt for them." >&2
    exit 1
fi
export SRASTA_REGISTER_EMAIL="${_reg_email}"
export SRASTA_REGISTER_ORG="${_reg_org}"
REGISTER_ENV="-e SRASTA_REGISTER_EMAIL -e SRASTA_REGISTER_ORG"

# Forward the image channel so the wizard's `docker compose` (run inside the
# container) resolves the gandiva service images to the same tag as the installer.
export SRASTA_IMAGE_TAG
CHANNEL_ENV="-e SRASTA_IMAGE_TAG"

if [ -d "${HOME}/.ssh" ]; then
    # shellcheck disable=SC2086 # HOST_FACTS_ENV + HOSTS_BIND + GPUS_FLAG + REGISTER_ENV are intentionally word-split
    docker run -d \
        --name "${CONTAINER_NAME}" \
        --restart unless-stopped \
        ${GPUS_FLAG} \
        ${GROUPADD} \
        ${HF_CACHE_ENV} \
        ${TARGET_UID_ENV} \
        ${HOST_FACTS_ENV} \
        ${STATE_DIR_ENV} \
        ${REGISTER_ENV} \
        ${CHANNEL_ENV} \
        -p "${INSTALLER_BIND_ADDR}:${INSTALLER_PORT}:9000" \
        -v /var/run/docker.sock:/var/run/docker.sock \
        -v "${STATE_BIND}" \
        ${HOSTS_BIND} \
        -v "${HOME}/.ssh:/home/srasta/.ssh:rw" \
        "${INSTALLER_IMAGE}"
else
    # shellcheck disable=SC2086 # HOST_FACTS_ENV + HOSTS_BIND + GPUS_FLAG + REGISTER_ENV are intentionally word-split
    docker run -d \
        --name "${CONTAINER_NAME}" \
        --restart unless-stopped \
        ${GPUS_FLAG} \
        ${GROUPADD} \
        ${HOST_FACTS_ENV} \
        ${STATE_DIR_ENV} \
        ${REGISTER_ENV} \
        ${CHANNEL_ENV} \
        -p "${INSTALLER_BIND_ADDR}:${INSTALLER_PORT}:9000" \
        -v /var/run/docker.sock:/var/run/docker.sock \
        -v "${STATE_BIND}" \
        ${HOSTS_BIND} \
        "${INSTALLER_IMAGE}"
fi

# Wait for health
echo "Waiting for installer to start..."
for _ in $(seq 1 30); do
    if docker exec "${CONTAINER_NAME}" python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:9000/api/version')" 2>/dev/null; then
        break
    fi
    sleep 1
done

# ── Apple host-native serving watcher (#393) ────────────────────────────────
#
# On a Mac host the installer runs in the Linux container and CAN'T reach Metal,
# so it can't serve the Apple engine (ollama/mlx) itself — that's host-native.
# Start a host-side watcher the container coordinates with: when the wizard's
# deploy reaches the host-native serving step it writes a request into the shared
# state dir; this watcher runs the serving driver (install → launchd-supervise →
# wait → pull) ON THE HOST and writes a status back.  Modules are stdlib-only by
# design, so the watcher needs nothing but the operator's python3 and self-exits
# when idle.  No-op on Linux: containerized vLLM on the GPU serves in-cluster, so
# the GB10 product path is unaffected.
if [ "$(uname -s)" = "Darwin" ]; then
    _SRASTA_DIR="${SRASTA_STATE_HOST_DIR}/.srasta"
    mkdir -p "${_SRASTA_DIR}"
    if command -v python3 >/dev/null 2>&1; then
        # Stop a watcher left over from a previous run (idempotent re-install).
        if [ -f "${_SRASTA_DIR}/host-serve-watcher.pid" ]; then
            kill "$(cat "${_SRASTA_DIR}/host-serve-watcher.pid")" 2>/dev/null || true
        fi
        # Extract the serving driver + watcher from the image so they run on the
        # host with the operator's python3 (both modules are stdlib-only).
        _WATCH_DIR="${_SRASTA_DIR}/host-serve"
        rm -rf "${_WATCH_DIR}"
        mkdir -p "${_WATCH_DIR}"
        if docker cp "${CONTAINER_NAME}:/srv/srasta-installer/installer/inference" "${_WATCH_DIR}/" 2>/dev/null; then
            nohup python3 "${_WATCH_DIR}/inference/host_serve_watcher.py" "${SRASTA_STATE_HOST_DIR}" \
                >> "${_SRASTA_DIR}/host-serve-watcher.log" 2>&1 &
            echo $! > "${_SRASTA_DIR}/host-serve-watcher.pid"
            echo "  Apple host-native serving watcher started (pid $(cat "${_SRASTA_DIR}/host-serve-watcher.pid"))."
        else
            echo "  WARNING: could not extract the host-serve watcher from the installer"
            echo "  image — Apple model serving may not start.  See ${_SRASTA_DIR}/host-serve-watcher.log"
        fi
    else
        echo "  WARNING: python3 not found on this Mac — the Apple host-native serving"
        echo "  watcher can't start, so no model will be served.  Install the Xcode"
        echo "  Command Line Tools (xcode-select --install), then re-run this installer."
    fi
fi

VERSION=$(docker exec "${CONTAINER_NAME}" python3 -c "import urllib.request, json; print(json.load(urllib.request.urlopen('http://localhost:9000/api/version'))['version'])" 2>/dev/null || echo "unknown")

# Pull the operator token (#353, Wave 2) from the container's startup log.
# main.py emits `OPERATOR_TOKEN=[<token>]` once at lifespan startup; grep
# the most recent occurrence so a container that's been restarted still
# picks up its current token.  If auth was explicitly disabled (env or
# legacy launch), there's no token line — fall back to a bare URL.
TOKEN=$(docker logs "${CONTAINER_NAME}" 2>&1 | grep -oE 'OPERATOR_TOKEN=\[[^]]+\]' | tail -1 | sed -E 's/OPERATOR_TOKEN=\[(.+)\]/\1/' || true)

# Pick the URL host the operator should browse to:
#   * 127.0.0.1 bind → `localhost` (operator is on this box)
#   * 0.0.0.0 bind   → this host's `hostname` (operator is on another box on
#     the network; on Tailscale + MagicDNS, the bare hostname resolves to the
#     Tailscale IP; on LAN, hostname.local / IP works the same way)
if [ "${INSTALLER_BIND_ADDR}" = "0.0.0.0" ]; then
    URL_HOST="$(hostname 2>/dev/null || echo localhost)"
else
    URL_HOST="localhost"
fi
if [ -n "${TOKEN}" ]; then
    URL="http://${URL_HOST}:${INSTALLER_PORT}/?token=${TOKEN}"
else
    URL="http://${URL_HOST}:${INSTALLER_PORT}"
fi

echo ""
echo "  Srasta Installer v${VERSION} is ready!"
echo ""
echo "  Open your browser (click; the token is one-time per install):"
echo "    ${URL}"
echo ""
if [ -n "${TOKEN}" ]; then
    echo "  The URL contains your operator token — don't share it.  To rotate:"
    echo "    docker exec ${CONTAINER_NAME} rm -f /opt/srasta/.srasta/operator-token"
    echo "    docker restart ${CONTAINER_NAME}"
    echo ""
fi
if [ "${INSTALLER_BIND_ADDR}" = "0.0.0.0" ]; then
    echo "  Installer bound to 0.0.0.0 — the wizard is reachable from any host"
    echo "  on this machine's network.  Token + #353 auth gate the API."
    echo "  If ${URL_HOST} doesn't resolve from your laptop, substitute the IP."
    echo ""
fi
echo "  For multi-host installs, ensure SSH keys are at ~/.ssh/"
echo "  To stop:  docker stop ${CONTAINER_NAME}"
echo "  To remove: docker rm ${CONTAINER_NAME}"
echo ""

# Try to open browser (best-effort) — only meaningful when operator is on
# this host (loopback bind).  Skip when 0.0.0.0: the operator is somewhere
# else, the locally-spawned browser would land on the wrong screen.
if [ "${INSTALLER_BIND_ADDR}" != "0.0.0.0" ]; then
    if command -v xdg-open >/dev/null 2>&1; then
        xdg-open "${URL}" 2>/dev/null || true
    elif command -v open >/dev/null 2>&1; then
        open "${URL}" 2>/dev/null || true
    fi
fi
