Skip to main content

Scalabilità e Auto-scaling

Come scalare i servizi con Nomad, manualmente e automaticamente.

Scaling Manuale

Via CLI

# Scala il gruppo "api" del job "auth-service" a 5 istanze
nomad job scale auth-service api 5

# Verifica
nomad job status auth-service

Via Job File

Modifica il count nel job file:

group "api" {
count = 5 # Cambia questo valore

# ...
}

Poi applica:

nomad job run auth-service.nomad

Scaling Policy (Auto-scaling)

Nomad supporta auto-scaling nativo basato su metriche.

Prerequisiti

  1. Nomad Autoscaler - Plugin separato da installare
  2. Prometheus - Per metriche (o altra fonte)

Installazione Autoscaler

# Deploy come job Nomad
nomad job run autoscaler.nomad

Job con Scaling Policy

job "auth-service" {
datacenters = ["visla-dc1"]
type = "service"

group "api" {
count = 2

scaling {
enabled = true
min = 2
max = 10

policy {
# Scala basato su CPU
check "cpu" {
source = "prometheus"
query = "avg(nomad_client_allocs_cpu_total_percent{task='auth'})"

strategy "target-value" {
target = 70 # Mantieni CPU al 70%
}
}
}
}

task "auth" {
driver = "docker"
# ...
}
}
}

Strategie di Scaling

1. Target Value

Mantiene una metrica a un valore target:

strategy "target-value" {
target = 70 # 70% CPU
}

2. Fixed Value

Scala a un numero fisso di istanze:

strategy "fixed-value" {
value = 5
}

3. Pass-through

Usa direttamente il valore dalla query:

strategy "pass-through" {}

Metriche per Scaling

CPU Usage

check "cpu" {
source = "prometheus"
query = "avg(nomad_client_allocs_cpu_total_percent{task='auth'})"

strategy "target-value" {
target = 70
}
}

Memory Usage

check "memory" {
source = "prometheus"
query = "avg(nomad_client_allocs_memory_usage{task='auth'} / nomad_client_allocs_memory_allocated{task='auth'}) * 100"

strategy "target-value" {
target = 80
}
}

Request Rate (da Traefik)

check "requests" {
source = "prometheus"
query = "sum(rate(traefik_service_requests_total{service='auth@consulcatalog'}[5m]))"

strategy "target-value" {
target = 100 # 100 req/s per istanza
}
}

Queue Length (Redis Streams)

check "queue" {
source = "prometheus"
query = "redis_stream_length{stream='positions:raw'}"

strategy "target-value" {
target = 1000 # Mantieni coda sotto 1000
}
}

Scaling per Tipo di Servizio

API Services (auth, devices, tracking)

scaling {
min = 2
max = 10

policy {
check "cpu" {
source = "prometheus"
query = "avg(nomad_client_allocs_cpu_total_percent{task='auth'})"

strategy "target-value" {
target = 70
}
}

check "latency" {
source = "prometheus"
query = "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket{service='auth'}[5m])) by (le))"

strategy "target-value" {
target = 0.2 # 200ms p95
}
}
}
}

Data Pipeline (decoder, position-filter)

scaling {
min = 2
max = 20 # Più headroom per picchi

policy {
# Scala basato sulla lunghezza della coda
check "queue_length" {
source = "prometheus"
query = "redis_stream_length{stream='positions:raw'}"

strategy "target-value" {
target = 500 # Coda target per istanza
}
}
}
}

WebSocket Service

scaling {
min = 2
max = 10

policy {
# Scala basato su connessioni attive
check "connections" {
source = "prometheus"
query = "sum(websocket_active_connections)"

strategy "target-value" {
target = 1000 # 1000 connessioni per istanza
}
}
}
}

Scaling Orizzontale del Cluster

Per scalare i worker nodes stessi, usa:

Google Cloud (Managed Instance Group)

# Crea template
gcloud compute instance-templates create nomad-worker-template \
--machine-type=n2-standard-4 \
--image-family=ubuntu-2204-lts \
--image-project=ubuntu-os-cloud \
--metadata-from-file=startup-script=setup-nomad-client.sh

# Crea managed instance group con autoscaling
gcloud compute instance-groups managed create nomad-workers \
--base-instance-name=nomad-worker \
--template=nomad-worker-template \
--size=3 \
--zone=europe-west1-b

# Configura autoscaling
gcloud compute instance-groups managed set-autoscaling nomad-workers \
--zone=europe-west1-b \
--min-num-replicas=2 \
--max-num-replicas=10 \
--target-cpu-utilization=0.7

Startup Script per Nuovi Worker

#!/bin/bash
# setup-nomad-client.sh

# Installa Nomad e Consul
curl -fsSL https://apt.releases.hashicorp.com/gpg | apt-key add -
apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main"
apt-get update && apt-get install -y nomad consul docker.io

# Configura Consul client
cat > /etc/consul.d/consul.hcl <<EOF
datacenter = "visla-dc1"
data_dir = "/opt/consul"
retry_join = ["consul-server-1.internal", "consul-server-2.internal", "consul-server-3.internal"]
encrypt = "CHIAVE_CONSUL"
EOF

# Configura Nomad client
cat > /etc/nomad.d/nomad.hcl <<EOF
datacenter = "visla-dc1"
data_dir = "/opt/nomad"

client {
enabled = true
}

consul {
address = "127.0.0.1:8500"
}
EOF

# Avvia servizi
systemctl enable consul nomad
systemctl start consul nomad

Cooldown e Rate Limiting

Evita scaling troppo frequenti:

scaling {
min = 2
max = 10

policy {
cooldown = "2m" # Attendi 2 minuti tra scale events

check "cpu" {
source = "prometheus"
query = "avg(nomad_client_allocs_cpu_total_percent{task='auth'})"

strategy "target-value" {
target = 70
}
}
}
}

Monitoring dello Scaling

Dashboard Grafana

Query utili:

# Numero di allocazioni per job
sum by (job) (nomad_client_allocs_running)

# Scaling events
rate(nomad_autoscaler_scaling_events_total[5m])

# CPU medio per task
avg by (task) (nomad_client_allocs_cpu_total_percent)

Alert per Scaling Limits

# Alert quando si raggiunge il massimo
- alert: NomadJobAtMaxScale
expr: |
nomad_client_allocs_running{job="auth-service"} >= 10
for: 5m
labels:
severity: warning
annotations:
summary: "Job auth-service at max scale"

Best Practices

  1. Inizia conservativo - Min=2, Max=5, poi aumenta

  2. Monitora prima di abilitare autoscaling - Raccogli metriche per capire i pattern

  3. Cooldown appropriato - 2-5 minuti per evitare flapping

  4. Test sotto carico - Verifica che lo scaling funzioni come atteso

  5. Budget alerts - Monitora i costi quando usi autoscaling cloud

  6. Graceful shutdown - Configura kill_timeout appropriato per drain delle connessioni

task "api" {
kill_timeout = "30s" # Tempo per graceful shutdown
}