#!/usr/bin/env bash
# WS69 — install.ledgerworks.systems curl-pipe bootstrap for LINUX_CONNECTOR_v1.
#
# Single-shell-script automation of the WS67 12-step
# linux_endpoint_federation_join.md runbook (steps 1-11; step 12 is sign-off).
#
# Invocation (target user-facing form):
#     curl -fsSL https://install.ledgerworks.systems/linux | sudo bash
#
# Authored 2026-05-29 via WS69 dispatch (founder direct AskUserQuestion). Per
# feedback_solution_must_match_intent — interactive but minimal (only OAuth
# token + endpoint_id need founder input; everything else auto-detected).
# Per feedback_no_preemptive_examples — OAuth token NEVER bundled; founder
# provides at install time OR endpoint enters TOFU mode (gap; founder-ratify-owed).
# Per CLAUDE.md item 6 fail-closed — half-failed installs leave NO substrate
# state; refuse + clean exit.
#
# Per AUTH §16 — Ed25519 signing key generated locally; never transmitted off-host.
# Per CLAUDE.md item 13 — federation.machines UPSERT performed by connector.py
# Phase 7 on first run (not by this script).
#
# Idempotent: re-running on a half-bootstrapped host completes the missing
# phases; fully-bootstrapped hosts no-op (skip-existing semantics).

set -euo pipefail

# ─── Configuration knobs (env-overridable; sensible defaults) ────────────────
LEDGERWORKS_ROOT="${LEDGERWORKS_ROOT:-/opt/ledgerworks}"
SECRETS_ROOT="${SECRETS_ROOT:-/opt/_secrets}"
INSTALL_BASE_URL="${INSTALL_BASE_URL:-https://install.ledgerworks.systems}"
INSTALL_VERSION="${INSTALL_VERSION:-latest}"          # served at /v/<version>/ if set
ENDPOINT_ID_DEFAULT="${ENDPOINT_ID_DEFAULT:-$(hostname -s)}"
FEDERATION_DSN_DEFAULT="${FEDERATION_DSN_DEFAULT:-}"  # supplied via /opt/_secrets/federation_oauth/<endpoint_id>.env post-step-3
HEARTBEAT_CADENCE_SEC="${HEARTBEAT_CADENCE_SEC:-300}"
INTERACTIVE="${BOOTSTRAP_INTERACTIVE:-1}"             # 0 = headless (read all from env)

# ─── Output helpers ──────────────────────────────────────────────────────────
log()  { printf '[bootstrap %(%Y-%m-%dT%H:%M:%SZ)T] %s\n' -1 "$*" >&2; }
fail() { log "FAIL: $*"; exit 1; }
ok()   { log "  ok: $*"; }

prompt() {
    # prompt <var_name> <description> [<default>]
    local var="$1" desc="$2" default="${3:-}"
    if [[ "$INTERACTIVE" != "1" ]]; then
        # Headless mode — must already be in env.
        if [[ -z "${!var:-}" ]]; then
            fail "headless mode but env var $var unset ($desc)"
        fi
        return
    fi
    if [[ -n "${!var:-}" ]]; then
        ok "$var already set in env: ${!var}"
        return
    fi
    local prompt_str="$desc"
    [[ -n "$default" ]] && prompt_str="$prompt_str [default: $default]"
    printf '%s: ' "$prompt_str" >&2
    local response
    read -r response
    if [[ -z "$response" && -n "$default" ]]; then
        response="$default"
    fi
    if [[ -z "$response" ]]; then
        fail "$var required ($desc)"
    fi
    printf -v "$var" '%s' "$response"
    export "$var"
}

# ─── Phase 0 — preflight (root + sudo + tooling) ─────────────────────────────
phase_0_preflight() {
    log "phase 0/11: preflight"
    if [[ $EUID -ne 0 ]]; then
        fail "must run as root (use: curl ... | sudo bash); current EUID=$EUID"
    fi
    for cmd in curl tar python3; do
        command -v "$cmd" >/dev/null 2>&1 || fail "$cmd not on PATH; install before bootstrap"
    done
    ok "running as root + curl + tar + python3 present"
}

# ─── Phase 1 — provision check (distro detect + hostname sanity) ─────────────
phase_1_detect_distro() {
    log "phase 1/11: provision (distro detect + hostname sanity)"
    DISTRO=""
    DISTRO_VERSION=""
    if [[ -r /etc/os-release ]]; then
        # shellcheck disable=SC1091
        . /etc/os-release
        DISTRO="${ID:-unknown}"
        DISTRO_VERSION="${VERSION_ID:-unknown}"
    fi
    case "$DISTRO" in
        ubuntu|debian|rocky|rhel|almalinux|centos|fedora|arch|alpine)
            ok "distro=$DISTRO version=$DISTRO_VERSION"
            ;;
        *)
            fail "distro '$DISTRO' not in supported set {ubuntu, debian, rocky, rhel, almalinux, centos, fedora, arch, alpine}"
            ;;
    esac
    local hn
    hn="$(hostname -s)"
    case "$hn" in
        localhost|localhost.localdomain|ubuntu|debian|""|rocky)
            fail "hostname '$hn' looks like default/template; set per runbook Step 1 before bootstrap"
            ;;
    esac
    ok "hostname=$hn"
}

# ─── Phase 1b — endpoint identifiers ─────────────────────────────────────────
phase_1b_endpoint_id() {
    ENDPOINT_ID="${ENDPOINT_ID:-}"
    prompt ENDPOINT_ID "Endpoint identifier (e.g. dev-docker-2, scraper-v3-product-01)" "$ENDPOINT_ID_DEFAULT"
    # Format guard per FEDERATION_NAMING_v1
    if [[ ! "$ENDPOINT_ID" =~ ^[a-z0-9][a-z0-9-]*[a-z0-9]$ ]]; then
        fail "endpoint_id '$ENDPOINT_ID' violates FEDERATION_NAMING_v1 (lower-kebab; start+end alnum)"
    fi
    ok "endpoint_id=$ENDPOINT_ID"
}

# ─── Phase 1c — install packages ─────────────────────────────────────────────
phase_1c_install_packages() {
    log "phase 1c: install OS packages (per Appendix A)"
    case "$DISTRO" in
        ubuntu|debian)
            export DEBIAN_FRONTEND=noninteractive
            apt-get update -qq
            apt-get install -y -qq python3 python3-venv python3-dev libpq5 \
                git rsync openssh-client curl jq ca-certificates
            ;;
        rocky|rhel|almalinux|centos|fedora)
            dnf install -y -q python3 python3-pip python3-devel \
                postgresql-libs git rsync openssh-clients curl jq ca-certificates
            ;;
        arch)
            pacman -Syu --noconfirm --needed python python-pip postgresql-libs \
                git rsync openssh curl jq ca-certificates
            ;;
        alpine)
            apk add --no-cache python3 py3-pip postgresql-libs git rsync \
                openssh-client curl jq ca-certificates musl-dev
            ;;
    esac
    ok "OS packages installed"
}

# ─── Phase 2 — keypair generation ────────────────────────────────────────────
phase_2_keypair() {
    log "phase 2/11: Ed25519 keypair (per AUTH §16)"
    local keydir="${SECRETS_ROOT}/${ENDPOINT_ID}"
    mkdir -p "$keydir"
    chmod 700 "$keydir"
    local privkey="${keydir}/private.key"
    if [[ -s "$privkey" ]]; then
        ok "existing keypair at $privkey (skip-generate; idempotent)"
    else
        ssh-keygen -t ed25519 -f "$privkey" -N '' \
            -C "federation_endpoint_${ENDPOINT_ID}_$(date -u +%Y%m%dT%H%M%SZ)" -q
        chmod 600 "$privkey"
        chmod 644 "${privkey}.pub"
        ok "generated keypair $privkey"
    fi
    SIGNING_KEY_FINGERPRINT="$(sha256sum "${privkey}.pub" | awk '{print $1}')"
    ok "signing_key_fingerprint=${SIGNING_KEY_FINGERPRINT:0:16}…"
}

# ─── Phase 3 — OAuth + DSN provisioning ──────────────────────────────────────
phase_3_oauth() {
    log "phase 3/11: OAuth + federation DSN provisioning"
    local oauth_dir="${SECRETS_ROOT}/federation_oauth"
    mkdir -p "$oauth_dir"
    chmod 700 "$oauth_dir"
    local env_file="${oauth_dir}/${ENDPOINT_ID}.env"

    if [[ -s "$env_file" ]]; then
        ok "existing $env_file (skip-prompt; idempotent)"
    else
        FEDERATION_OAUTH_TOKEN="${FEDERATION_OAUTH_TOKEN:-}"
        prompt FEDERATION_OAUTH_TOKEN \
            "Federation OAuth token (paste; founder-distributed per runbook Step 3 option (i))" ""
        FEDERATION_DSN="${FEDERATION_DSN:-$FEDERATION_DSN_DEFAULT}"
        if [[ -z "${FEDERATION_DSN:-}" ]]; then
            prompt FEDERATION_DSN \
                "Federation DSN (postgresql://user:pass@host:25060/ledgerworks_exec_core?sslmode=require)" ""
        fi
        cat > "$env_file" <<EOF
# Federation OAuth credentials — generated by bootstrap.sh WS69 $(date -u +%Y-%m-%dT%H:%M:%SZ)
FEDERATION_OAUTH_TOKEN=${FEDERATION_OAUTH_TOKEN}
FEDERATION_DSN=${FEDERATION_DSN}
ENDPOINT_ID=${ENDPOINT_ID}
SIGNING_KEY_PATH=${SECRETS_ROOT}/${ENDPOINT_ID}/private.key
EOF
        chmod 600 "$env_file"
        ok "wrote $env_file (mode 600)"
    fi
}

# ─── Phase 4 — fetch federation libs + venv ──────────────────────────────────
phase_4_venv() {
    log "phase 4/11: Python venv + federation libs"
    mkdir -p "${LEDGERWORKS_ROOT}/lib" "${LEDGERWORKS_ROOT}/scripts/linux_connector" "${LEDGERWORKS_ROOT}/venv"

    # Fetch connector tarball from install endpoint (signed; SHA-256-verified).
    local tarball="connector-${INSTALL_VERSION}.tar.gz"
    local tarball_url="${INSTALL_BASE_URL}/${tarball}"
    local sig_url="${INSTALL_BASE_URL}/${tarball}.sha256"
    local tmp_tar="/tmp/connector-bootstrap.$$.tar.gz"
    local tmp_sha="/tmp/connector-bootstrap.$$.sha256"

    log "  fetching ${tarball_url}"
    curl -fsSL "$tarball_url" -o "$tmp_tar"
    curl -fsSL "$sig_url" -o "$tmp_sha"

    local expected actual
    expected="$(awk '{print $1}' "$tmp_sha")"
    actual="$(sha256sum "$tmp_tar" | awk '{print $1}')"
    if [[ "$expected" != "$actual" ]]; then
        rm -f "$tmp_tar" "$tmp_sha"
        fail "tarball SHA-256 mismatch: expected=$expected actual=$actual (TAMPERING SUSPECT — refusing)"
    fi
    ok "tarball SHA-256 verified ($expected)"

    tar -xzf "$tmp_tar" -C "${LEDGERWORKS_ROOT}/"
    rm -f "$tmp_tar" "$tmp_sha"
    ok "extracted connector tarball to ${LEDGERWORKS_ROOT}/"

    if [[ ! -x "${LEDGERWORKS_ROOT}/venv/bin/python" ]]; then
        python3 -m venv "${LEDGERWORKS_ROOT}/venv"
    fi
    "${LEDGERWORKS_ROOT}/venv/bin/pip" install --quiet --upgrade pip
    "${LEDGERWORKS_ROOT}/venv/bin/pip" install --quiet 'psycopg2-binary==2.9.9' requests
    ok "venv ready at ${LEDGERWORKS_ROOT}/venv"

    # Ownership: per-service user per Step 1 decision; default spostma (founder-shared).
    local svc_user="${BOOTSTRAP_SERVICE_USER:-spostma}"
    if id -u "$svc_user" >/dev/null 2>&1; then
        chown -R "$svc_user:$svc_user" "${LEDGERWORKS_ROOT}/" "${SECRETS_ROOT}/${ENDPOINT_ID}/" \
            "${SECRETS_ROOT}/federation_oauth/${ENDPOINT_ID}.env"
        ok "chown -R $svc_user"
    else
        log "  warn: service user '$svc_user' does not exist; running connector as root (review)"
    fi
}

# ─── Phase 5 — systemd unit install ──────────────────────────────────────────
phase_5_systemd() {
    log "phase 5/11: systemd unit installation"
    local src="${LEDGERWORKS_ROOT}/scripts/linux_connector"
    for unit in connector.service connector.timer; do
        if [[ ! -f "${src}/${unit}" ]]; then
            fail "${src}/${unit} missing from tarball; refusing"
        fi
        cp "${src}/${unit}" "/etc/systemd/system/${unit}"
        ok "installed /etc/systemd/system/${unit}"
    done
    systemctl daemon-reload
    ok "systemctl daemon-reload"
}

# ─── Phase 6-10 — connector first run (full join) ────────────────────────────
phase_6_through_10_first_join() {
    log "phase 6-10/11: connector.py first run (federation join via lib helpers)"
    # Source the env file so connector.py reads everything from process env.
    local env_file="${SECRETS_ROOT}/federation_oauth/${ENDPOINT_ID}.env"
    # shellcheck disable=SC1090
    set -a; . "$env_file"; set +a

    local svc_user="${BOOTSTRAP_SERVICE_USER:-spostma}"
    local connector="${LEDGERWORKS_ROOT}/scripts/linux_connector/connector.py"
    local venv_py="${LEDGERWORKS_ROOT}/venv/bin/python"

    if ! [[ -x "$venv_py" && -f "$connector" ]]; then
        fail "connector.py or venv python missing post-phase-4"
    fi

    # Run as service user if it exists; else root (logged in phase 4).
    local run_prefix=""
    if id -u "$svc_user" >/dev/null 2>&1; then
        run_prefix="sudo -u $svc_user -E"
    fi

    log "  invoking: $run_prefix PYTHONPATH=${LEDGERWORKS_ROOT}/lib $venv_py $connector"
    set +e
    PYTHONPATH="${LEDGERWORKS_ROOT}/lib" \
        $run_prefix env \
            PYTHONPATH="${LEDGERWORKS_ROOT}/lib" \
            ENDPOINT_ID="$ENDPOINT_ID" \
            SIGNING_KEY_PATH="${SECRETS_ROOT}/${ENDPOINT_ID}/private.key" \
            FEDERATION_DSN="$FEDERATION_DSN" \
            FEDERATION_OAUTH_TOKEN="${FEDERATION_OAUTH_TOKEN:-}" \
            "$venv_py" "$connector"
    local rc=$?
    set -e
    if [[ $rc -ne 0 ]]; then
        fail "connector.py first run rc=$rc (phases 6-10); endpoint stays pre_operational; timer NOT enabled per CLAUDE.md item 6 fail-closed"
    fi
    ok "connector.py first run succeeded (phases 6-10)"
}

# ─── Phase 11 — enable timer ─────────────────────────────────────────────────
phase_11_enable_timer() {
    log "phase 11/11: enable connector.timer for ongoing heartbeat"
    systemctl enable --now connector.timer
    sleep 2
    if systemctl is-active --quiet connector.timer; then
        ok "connector.timer active"
    else
        fail "connector.timer failed to activate; investigate journalctl -u connector"
    fi
}

# ─── Cleanup-on-failure trap (per CLAUDE.md item 6 fail-closed) ──────────────
cleanup_on_failure() {
    local rc=$?
    if [[ $rc -ne 0 ]]; then
        log "BOOTSTRAP FAILED rc=$rc — leaving NO half-installed substrate"
        log "  manual cleanup (if appropriate):"
        log "    systemctl disable --now connector.timer 2>/dev/null || true"
        log "    rm -rf ${SECRETS_ROOT}/${ENDPOINT_ID:-_unknown}/"
        log "    rm -f ${SECRETS_ROOT}/federation_oauth/${ENDPOINT_ID:-_unknown}.env"
        log "  fix the failure cause, then re-run bootstrap (idempotent)."
    fi
}
trap cleanup_on_failure EXIT

# ─── main ────────────────────────────────────────────────────────────────────
main() {
    log "============================================================"
    log "WS69 LedgerWorks Linux endpoint bootstrap"
    log "  per LINUX_CONNECTOR_v1 + linux_endpoint_federation_join.md"
    log "  install endpoint: ${INSTALL_BASE_URL}"
    log "============================================================"

    phase_0_preflight
    phase_1_detect_distro
    phase_1b_endpoint_id
    phase_1c_install_packages
    phase_2_keypair
    phase_3_oauth
    phase_4_venv
    phase_5_systemd
    phase_6_through_10_first_join
    phase_11_enable_timer

    log "============================================================"
    log "SUCCESS — endpoint ${ENDPOINT_ID} joined the federation"
    log "  signing fingerprint: ${SIGNING_KEY_FINGERPRINT}"
    log "  heartbeat cadence:  ${HEARTBEAT_CADENCE_SEC}s"
    log "  verify in central:"
    log "    SELECT * FROM federation.lookup WHERE namespace='endpoints' AND name='linux_endpoint_${ENDPOINT_ID}';"
    log "    SELECT * FROM endpoint.endpoint_join_events WHERE endpoint_id='${ENDPOINT_ID}' ORDER BY boot_at DESC LIMIT 1;"
    log "============================================================"
}

main "$@"
