#!/usr/bin/env bash
# FileCat Agent — Mac / Linux
# Scansiona cartelle locali e invia i file all'API di FileCat.
#
# UTILIZZO:
#   chmod +x filecat_agent.sh
#   ./filecat_agent.sh --api-url https://filecat.dcsas.it --token TUO_TOKEN
#
# OPZIONI:
#   --api-url URL        URL dell'istanza FileCat (obbligatorio)
#   --token TOKEN        Bearer token API (obbligatorio)
#   --name NOME          Nome sorgente in FileCat (default: hostname)
#   --paths "P1,P2"      Percorsi separati da virgola (default: $HOME)
#   --hash               Calcola SHA256 per ogni file (lento)
#   --batch-size N       File per richiesta (default: 500)
#   --state-file PATH    File di stato incrementale (default: ~/.filecat_state)
#
# PIANIFICAZIONE (crontab):
#   # Ogni notte alle 02:00:
#   0 2 * * * /percorso/filecat_agent.sh --api-url https://filecat.dcsas.it --token TUO_TOKEN >> /tmp/filecat.log 2>&1

set -euo pipefail

# ── Parametri di default ──────────────────────────────────────────────────────
API_URL=""
API_TOKEN=""
SOURCE_NAME="$(hostname)"
SCAN_PATHS=("$HOME")
COMPUTE_HASH=false
BATCH_SIZE=500
STATE_FILE="${HOME}/.filecat_state"

# ── Parsing argomenti ─────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
    case "$1" in
        --api-url)    API_URL="$2";   shift 2 ;;
        --token)      API_TOKEN="$2"; shift 2 ;;
        --name)       SOURCE_NAME="$2"; shift 2 ;;
        --paths)      IFS=',' read -ra SCAN_PATHS <<< "$2"; shift 2 ;;
        --hash)       COMPUTE_HASH=true; shift ;;
        --batch-size) BATCH_SIZE="$2"; shift 2 ;;
        --state-file) STATE_FILE="$2"; shift 2 ;;
        *) echo "Opzione sconosciuta: $1" >&2; exit 1 ;;
    esac
done

if [[ -z "$API_URL" || -z "$API_TOKEN" ]]; then
    echo "Errore: --api-url e --token sono obbligatori." >&2
    exit 1
fi

# ── Funzioni helper ───────────────────────────────────────────────────────────

get_file_type() {
    local ext="${1,,}"  # lowercase
    case "$ext" in
        jpg|jpeg|png|gif|bmp|tiff|tif|webp|heic|heif|raw|cr2|nef|arw) echo "photo"    ;;
        mp4|mkv|avi|mov|wmv|flv|webm|m4v|mpg|mpeg|3gp|ts|mts)         echo "video"    ;;
        mp3|flac|wav|aac|ogg|wma|m4a|aiff|opus|ape|alac)               echo "audio"    ;;
        pdf|doc|docx|xls|xlsx|ppt|pptx|txt|rtf|odt|ods|odp|csv|md)    echo "document" ;;
        *)                                                               echo "other"    ;;
    esac
}

sha256_file() {
    if command -v sha256sum &>/dev/null; then
        sha256sum "$1" 2>/dev/null | awk '{print $1}'
    elif command -v shasum &>/dev/null; then
        shasum -a 256 "$1" 2>/dev/null | awk '{print $1}'
    else
        echo ""
    fi
}

# JSON escape (minimale — evita caratteri di controllo e backslash)
json_escape() {
    local s="$1"
    s="${s//\\/\\\\}"
    s="${s//\"/\\\"}"
    s="${s//	/\\t}"
    printf '%s' "$s"
}

# Carica stato incrementale (file path → "size|mtime")
declare -A STATE
if [[ -f "$STATE_FILE" ]]; then
    while IFS='=' read -r k v; do
        STATE["$k"]="$v"
    done < <(grep '=' "$STATE_FILE" || true)
fi

send_batch() {
    local json_files="$1"
    local payload="{\"source_name\":\"$(json_escape "$SOURCE_NAME")\",\"source_type\":\"$(uname -s | tr '[:upper:]' '[:lower:]')_agent\",\"files\":[$json_files]}"
    local http_code
    http_code=$(curl -sk -o /tmp/filecat_resp.txt -w "%{http_code}" \
        -X POST "${API_URL}/api/v1/files" \
        -H "Authorization: Bearer ${API_TOKEN}" \
        -H "Content-Type: application/json" \
        --data-binary "$payload")
    if [[ "$http_code" -ge 200 && "$http_code" -lt 300 ]]; then
        echo "  Batch inviato OK ($(cat /tmp/filecat_resp.txt))"
    else
        echo "  ERRORE API $http_code: $(cat /tmp/filecat_resp.txt)" >&2
    fi
}

# ── Skip dirs ────────────────────────────────────────────────────────────────
SKIP_DIRS=(".git" "__pycache__" "node_modules" ".Trash" "Library/Caches" \
           "Library/Logs" ".cache" ".local/share/Trash")

should_skip() {
    local path="$1"
    for d in "${SKIP_DIRS[@]}"; do
        if [[ "$path" == *"/$d"* ]]; then return 0; fi
    done
    return 1
}

# ── Scansione ─────────────────────────────────────────────────────────────────
total=0
batch_json=""
batch_count=0
declare -A NEW_STATE

echo "FileCat Agent — $(date)"
echo "Sorgente: $SOURCE_NAME → $API_URL"

for scan_path in "${SCAN_PATHS[@]}"; do
    if [[ ! -d "$scan_path" ]]; then
        echo "Percorso non trovato: $scan_path" >&2
        continue
    fi
    echo "Scansione: $scan_path"

    while IFS= read -r -d '' filepath; do
        # Salta directory di sistema
        if should_skip "$filepath"; then continue; fi
        # Salta se non è un file regolare
        [[ -f "$filepath" ]] || continue

        # Fingerprint
        size=$(stat -c%s "$filepath" 2>/dev/null || stat -f%z "$filepath" 2>/dev/null || echo "0")
        mtime=$(stat -c%Y "$filepath" 2>/dev/null || stat -f%m "$filepath" 2>/dev/null || echo "0")
        fingerprint="${size}|${mtime}"
        state_key="$filepath"

        if [[ "${STATE[$state_key]+_}" ]] && [[ "${STATE[$state_key]}" == "$fingerprint" ]]; then
            NEW_STATE["$state_key"]="$fingerprint"
            continue
        fi

        name="$(basename "$filepath")"
        ext="${name##*.}"
        [[ "$ext" == "$name" ]] && ext=""
        file_type=$(get_file_type "$ext")
        sha256=""
        $COMPUTE_HASH && sha256=$(sha256_file "$filepath")

        # ISO 8601 mtime
        mod_date=$(date -d "@$mtime" '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || \
                   date -r "$filepath" '+%Y-%m-%dT%H:%M:%SZ' 2>/dev/null || echo "")

        entry="{\"path\":\"$(json_escape "$filepath")\","
        entry+="\"name\":\"$(json_escape "$name")\","
        entry+="\"extension\":\"$(json_escape "$ext")\","
        entry+="\"size\":$size,"
        entry+="\"hash_sha256\":\"$sha256\","
        entry+="\"file_type\":\"$file_type\","
        entry+="\"modified_at_source\":\"$mod_date\"}"

        if [[ -n "$batch_json" ]]; then batch_json+=","; fi
        batch_json+="$entry"
        ((batch_count++)) || true
        ((total++)) || true
        NEW_STATE["$state_key"]="$fingerprint"

        if [[ $batch_count -ge $BATCH_SIZE ]]; then
            send_batch "$batch_json"
            batch_json=""
            batch_count=0
            echo "  $total file elaborati..."
        fi
    done < <(find "$scan_path" -type f -print0 2>/dev/null)
done

# Ultimo batch
if [[ $batch_count -gt 0 ]]; then send_batch "$batch_json"; fi

# Salva stato incrementale
{
    for k in "${!NEW_STATE[@]}"; do
        printf '%s=%s\n' "$k" "${NEW_STATE[$k]}"
    done
} > "$STATE_FILE"

echo ""
echo "Completato: $total file nuovi/modificati inviati a FileCat."
