#!/usr/bin/env bash
# ==============================================================================
# setup-tpch-bench.sh
#
# Creates artifacts and runs commands for TPCH benchmark infrastructure.
# Auth (chalk, chalkadmin, aws, gcloud) is assumed to be already configured.
#
# Follow ~/chalk-private/infra/staging-environments/README.md for auth setup.
# Note: the README says to compile the provider locally, but the public
# registry provider (registry.terraform.io/chalk-ai/chalk) works fine now.
#
# What this script does:
#   1. Writes the Terraform file for the staging environment
#   2. Grants GCS bucket read access to cross-cloud service accounts
#   3. Adds S3 cross-account bucket policy for staging
#   4. Grants BigQuery IAM roles
#   5. Prints next steps (terraform apply, deploy tpch-sandbox)
# ==============================================================================

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CONFIG_FILE="${SCRIPT_DIR}/config.yaml"

# ==============================================================================
# Parse config.yaml
# ==============================================================================

if ! command -v python3 > /dev/null 2>&1; then
    echo "ERROR: python3 required"; exit 1
fi

if [ ! -f "${CONFIG_FILE}" ]; then
    echo "ERROR: config.yaml not found at ${CONFIG_FILE}"; exit 1
fi

eval "$(python3 -c "
import yaml, os
with open('${CONFIG_FILE}') as f:
    cfg = yaml.safe_load(f)
for key, value in cfg.items():
    val = str(value) if value is not None else ''
    if '~' in val: val = os.path.expanduser(val)
    safe_val = val.replace(\"'\", \"'\\\"'\\\"'\")
    print(f\"export CFG_{key.upper()}='{safe_val}'\")
")"

GCS_BUCKET="${CFG_GCS_BUCKET}"
GCS_PROJECT="${CFG_GCS_PROJECT}"
GCS_ROLE="${CFG_GCS_ROLE}"
S3_BUCKET="${CFG_S3_BUCKET}"
STAGING_AWS_ACCOUNT_ID="${CFG_STAGING_AWS_ACCOUNT_ID}"
AWS_PLAYGROUND_PROFILE="${CFG_AWS_PLAYGROUND_PROFILE}"
REMOTEDEV_CROSS_CLOUD_SA="${CFG_REMOTEDEV_CROSS_CLOUD_SA}"
STAGING_CROSS_CLOUD_SA="${CFG_STAGING_CROSS_CLOUD_SA}"
BQ_ENGINE_ROLE="${CFG_BQ_ENGINE_ROLE}"
CHALK_PROJECT="${CFG_CHALK_PROJECT}"
ENVIRONMENT_ID="${CFG_ENVIRONMENT_ID}"
VALKEY_CLUSTER_ID="${CFG_VALKEY_CLUSTER_ID}"
STAGING_ENVS_DIR="${CFG_STAGING_ENVS_DIR}"

# ==============================================================================
# Helpers
# ==============================================================================

gcs_binding_exists() {
    local bucket="$1" role="$2" member="$3" project="$4"
    gcloud storage buckets get-iam-policy "gs://${bucket}" --project="${project}" \
        --format=json 2>/dev/null \
        | python3 -c "
import json, sys
policy = json.load(sys.stdin)
for binding in policy.get('bindings', []):
    if binding.get('role') == '${role}' and '${member}' in binding.get('members', []):
        sys.exit(0)
sys.exit(1)
" 2>/dev/null
}

project_iam_binding_exists() {
    local project="$1" role="$2" member="$3"
    gcloud projects get-iam-policy "${project}" \
        --flatten="bindings[].members" \
        --filter="bindings.members:${member}" \
        --format="value(bindings.role)" 2>/dev/null | grep -qF "${role}"
}

s3_policy_has_sid() {
    local bucket="$1" sid="$2" profile="$3"
    local policy
    if policy=$(aws s3api get-bucket-policy --bucket "${bucket}" --profile "${profile}" --output text 2>/dev/null); then
        echo "${policy}" | python3 -c "
import json, sys
policy = json.load(sys.stdin)
for stmt in policy.get('Statement', []):
    if stmt.get('Sid') == '${sid}':
        sys.exit(0)
sys.exit(1)
" 2>/dev/null
    else
        return 1
    fi
}

# ##############################################################################
# 1. Write Terraform file
# ##############################################################################

echo "=== 1. Terraform file ==="

TERRAFORM_FILE="${STAGING_ENVS_DIR}/tpch-bench.tf"

# The chalk_project resource creates the "bench" project via the Chalk API.
# The environment module creates the environment, Valkey, TimescaleDB, and
# background persistence (BigQuery offline store + Kafka).
TERRAFORM_CONTENT="# TPCH Benchmark Environment
# Created by setup-tpch-bench.sh for https://github.com/chalk-ai/tpch-sandbox

resource \"chalk_project\" \"${CHALK_PROJECT}\" {
  name = \"${CHALK_PROJECT}\"
}

module \"tpch_bench_online_store\" {
  source              = \"github.com/chalk-ai/chalk-vendored-terraform-modules/modules/aws/online-store/valkey\"
  cluster_id          = \"${VALKEY_CLUSTER_ID}\"
  vpc_id              = local.aws_vpc_id
  subnet_ids          = local.aws_subnets
  allowed_cidr_blocks = [local.aws_cidr]
}

module \"tpch_test_aws_staging_env\" {
  source         = \"./modules/aws-staging-environment\"
  environment_id = \"${ENVIRONMENT_ID}\"
  cluster_id     = local.aws_kube_cluster_id
  project_id     = chalk_project.${CHALK_PROJECT}.id
  redis_secret   = module.tpch_bench_online_store.valkey_endpoint_redis_secret_name
  subnet_ids     = local.aws_subnets
  vpc_id         = local.aws_vpc_id
}
"

if [ -f "${TERRAFORM_FILE}" ]; then
    if [ "$(cat "${TERRAFORM_FILE}")" = "${TERRAFORM_CONTENT}" ]; then
        echo "[OK] ${TERRAFORM_FILE} already up to date."
    else
        echo "[DIFF] ${TERRAFORM_FILE} exists with different content:"
        diff <(cat "${TERRAFORM_FILE}") <(echo "${TERRAFORM_CONTENT}") || true
        read -r -p "Overwrite? [y/N] " r
        [[ "$r" =~ ^[yY] ]] && echo "${TERRAFORM_CONTENT}" > "${TERRAFORM_FILE}" && echo "Written."
    fi
else
    echo "${TERRAFORM_CONTENT}" > "${TERRAFORM_FILE}"
    echo "Wrote ${TERRAFORM_FILE}"
fi

# ##############################################################################
# 2. GCS bucket IAM
# ##############################################################################

echo ""
echo "=== 2. GCS bucket IAM (gs://${GCS_BUCKET}) ==="

for pair in "remotedev:${REMOTEDEV_CROSS_CLOUD_SA}" "staging:${STAGING_CROSS_CLOUD_SA}"; do
    desc="${pair%%:*}"; sa="${pair#*:}"; member="serviceAccount:${sa}"
    if gcs_binding_exists "${GCS_BUCKET}" "${GCS_ROLE}" "${member}" "${GCS_PROJECT}"; then
        echo "[OK] ${desc} SA already has ${GCS_ROLE}."
    else
        echo "[ADDING] ${GCS_ROLE} -> ${desc} SA"
        gcloud storage buckets add-iam-policy-binding "gs://${GCS_BUCKET}" \
            --member="${member}" --role="${GCS_ROLE}" --project="${GCS_PROJECT}"
    fi
done

# ##############################################################################
# 3. S3 cross-account bucket policy
# ##############################################################################

echo ""
echo "=== 3. S3 cross-account bucket policy (s3://${S3_BUCKET}) ==="
echo "Remotedev (same account): no changes needed."

POLICY_SID="AllowStagingAccountRead"

if [ -z "${AWS_PLAYGROUND_PROFILE}" ]; then
    echo "SKIP: aws_playground_profile not set in config.yaml."
    echo "Set it and re-run, or manually add a bucket policy granting"
    echo "s3:GetObject + s3:ListBucket to arn:aws:iam::${STAGING_AWS_ACCOUNT_ID}:root"
else
    if s3_policy_has_sid "${S3_BUCKET}" "${POLICY_SID}" "${AWS_PLAYGROUND_PROFILE}"; then
        echo "[OK] Bucket policy already has '${POLICY_SID}' statement."
    else
        echo "[ADDING] Cross-account read for staging account ${STAGING_AWS_ACCOUNT_ID}"

        NEW_STATEMENT="{
            \"Sid\": \"${POLICY_SID}\",
            \"Effect\": \"Allow\",
            \"Principal\": {\"AWS\": \"arn:aws:iam::${STAGING_AWS_ACCOUNT_ID}:root\"},
            \"Action\": [\"s3:GetObject\", \"s3:ListBucket\", \"s3:GetBucketLocation\"],
            \"Resource\": [\"arn:aws:s3:::${S3_BUCKET}\", \"arn:aws:s3:::${S3_BUCKET}/*\"]
        }"

        if EXISTING=$(aws s3api get-bucket-policy --bucket "${S3_BUCKET}" --profile "${AWS_PLAYGROUND_PROFILE}" --output text 2>/dev/null); then
            BUCKET_POLICY=$(python3 -c "
import json, sys
policy = json.loads(sys.argv[1]); policy['Statement'].append(json.loads(sys.argv[2]))
print(json.dumps(policy))
" "${EXISTING}" "${NEW_STATEMENT}")
        else
            BUCKET_POLICY="{\"Version\": \"2012-10-17\", \"Statement\": [${NEW_STATEMENT}]}"
        fi

        aws s3api put-bucket-policy --bucket "${S3_BUCKET}" \
            --profile "${AWS_PLAYGROUND_PROFILE}" --policy "${BUCKET_POLICY}"
        echo "Done."
    fi
fi

# ##############################################################################
# 4. BigQuery IAM
# ##############################################################################

echo ""
echo "=== 4. BigQuery IAM on ${GCS_PROJECT} ==="

for pair in "staging:${STAGING_CROSS_CLOUD_SA}" "remotedev:${REMOTEDEV_CROSS_CLOUD_SA}"; do
    desc="${pair%%:*}"; sa="${pair#*:}"; member="serviceAccount:${sa}"
    for role in "${BQ_ENGINE_ROLE}" "roles/bigquery.jobUser"; do
        short="${role##*/}"
        if project_iam_binding_exists "${GCS_PROJECT}" "${role}" "${sa}"; then
            echo "[OK] ${desc} SA already has ${short}."
        else
            echo "[ADDING] ${short} -> ${desc} SA"
            gcloud projects add-iam-policy-binding "${GCS_PROJECT}" \
                --member="${member}" --role="${role}" --condition=None --quiet
        fi
    done
done

# ##############################################################################
# 5. Next steps
# ##############################################################################

echo ""
echo "=== Done. Next steps ==="
echo ""
echo "1. Authenticate per the README:"
echo "   cd ${STAGING_ENVS_DIR}"
echo "   chalkadmin aws envtoken root-staging-account-do-not-delete --plane staging"
echo ""
echo "2. Terraform:"
echo "   cd ${STAGING_ENVS_DIR}"
echo "   terraform init -upgrade"
echo "   terraform plan"
echo "   terraform apply"
echo ""
echo "3. Deploy tpch-sandbox:"
echo "   cd /path/to/tpch-sandbox"
echo "   chalk apply --environment ${ENVIRONMENT_ID} --project ${CHALK_PROJECT}"
echo ""
echo "Environment URL: https://${ENVIRONMENT_ID}.awsstaging.internal.aws.chalk.dev/"
