Skip to main content
Version: 3.7.0

Load Balancing

The performance of a single instance of the virtual appliance is of course limited by the HW resources and by the number of concurrent tasks the API component can handle. To work around these limitations, we can advise you to deploy multiple instances of the virtual appliance and put a load balancer before them.

How the load balancer works

The load balancer (LB) must ensure that the requests for the same task are routed to the same instance of the virtual appliance. This is called a stateful session. It can be achieved with a session cookie or with a session header.

The request flow is then following:

  1. The client POSTs a task to the LB.
  2. The LB picks a virtual appliance instance (depending on an LB algorithm) and sends the request there.
  3. The API in the virtual appliance accepts the task and sends a response back to the LB.
  4. The LB adds a session cookie or a session header to the response and sends it back to the client.
  5. The client extracts the task id and the session cookie or session header from the response.
  6. The client polls for the task. It sends a GET request with the session cookie or session header to the LB.
  7. The LB routes the request to the proper instance of the virtual appliance based on the session cookie or session header.

In the following example, have used Envoy as the load balancer. Any other load balancer can be used if it supports stateful sessions.

Envoy configuration

This is the example Envoy configuration:

static_resources:
listeners:
- address:
socket_address:
# Load balancer address and port
# This is where Envoy accepts the incoming traffic
address: 0.0.0.0
port_value: 8080
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
access_log:
- name: envoy.access_loggers.stdout
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
log_format:
text_format_source:
inline_string: >
[%START_TIME%] "%REQ(:METHOD)%
%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%"
%RESPONSE_CODE% %RESPONSE_FLAGS%
%RESPONSE_CODE_DETAILS%
%UPSTREAM_REQUEST_ATTEMPT_COUNT% %BYTES_RECEIVED%
%BYTES_SENT% %DURATION%
%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%
"%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%"
"%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%"
"%UPSTREAM_HOST%" "%REQ(REQUEST-ID)%"
"%REQ(CORRELATION-ID)%" "%REQ(session-header)%"
"%RESP(session-header)%"
codec_type: AUTO
stat_prefix: ingress_http
route_config:
name: local_route
virtual_hosts:
- name: backend
domains:
- "*"
routes:
- match:
prefix: "/api/"
route:
cluster: speech-platform-virtual-appliance
retry_policy:
retry_on: "retriable-status-codes"
# Retry request on a different upstream when the 429 response is received
# This should happen when POSTing a request/task but max concurrent tasks limit is reached
# This ensures that task is accepted in the other (== less busy) instance of the virtual appliance
retriable_status_codes:
- 429
# How many times is the request retried
# Should be # of virtual appliance instances minus 1
num_retries: 1

http_filters:
- name: envoy.filters.http.stateful_session
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.stateful_session.v3.StatefulSession
strict: true
session_state:
name: envoy.http.stateful_session.header
typed_config:
"@type": type.googleapis.com/envoy.extensions.http.stateful_session.header.v3.HeaderBasedSessionState
# Name of the session header
# Contains base64 encoded upstream_address:port
# This tells Envoy to which upstream server it should send the request
name: session-header
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
clusters:
- name: speech-platform-virtual-appliance
connect_timeout: 0.5s
type: STATIC
dns_lookup_family: V4_ONLY
lb_policy: RANDOM
load_assignment:
cluster_name: speech-platform-virtual-appliance
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
# IP address of the first instance of the virtual appliance
address: 1.2.3.4
# Port of the first instance of the virtual appliance
port_value: 80
- endpoint:
address:
socket_address:
# IP address of the second instance of the virtual appliance
address: 1.2.3.5
# Port of the second instance of the virtual appliance
port_value: 80
health_checks:
- timeout: 2s
interval: 60s
interval_jitter: 1s
unhealthy_threshold: 3
healthy_threshold: 3
http_health_check:
# Healthcheck uri of the speech api inside virtual appliance
path: /api/system/status

# Admin interface for looking at things
admin:
address:
socket_address:
address: 0.0.0.0
port_value: 9090

Access API with LB

Here is an example script to show how to work with a header-based stateful session using curl:

#!/bin/bash

# URL of the virtual appliance or load balancer
platform_url=http://localhost:8080

# URI to POST the task to
uri="/api/technology/speech-to-text?language=en"

# Path to audio file for processing
voice_file=/tmp/audio.wav

# Proccess this many tasks in parallel
parallel=100

# End when this many tasks are processed
total_tasks=400

# Post tasks to the API so that we still have $parallel tasks running
post_tasks() {
local count=$1
local tmpfile_task=/tmp/task.${task_counter}.json
local tmpfile_headers=/tmp/headers.${task_counter}.txt

for i in $(seq 1 $count); do
# POST single task
echo "[${task_counter}] Task $i of ${count}"
curl \
-L -s -X POST \
-H 'Content-Type: multipart/form-data' \
-H 'Accept: application/json' \
-F file=@"${voice_file}" \
--output ${tmpfile_task} \
--dump-header ${tmpfile_headers} \
"${platform_url}${uri}"

rv=$?

# Parse session header
session_header=$(grep session-header ${tmpfile_headers} | cut -d ':' -f 2)
echo "[${task_counter}] Curl response code is: ${rv}"
echo "[${task_counter}] Session header is: ${session_header}"
echo "[${task_counter}] $(cat ${tmpfile_task})"


task_id=$(jq -r '.task.task_id' ${tmpfile_task})
# Store task id
current_tasks+=($task_id)
# Store session header for each task id
taskToHeader["${task_id}"]="${session_header}"
task_counter=$((${task_counter} +1))
done
}

# Poll for all running tasks
poll_tasks() {
local counter_done=0
local counter_rejected=0
local counter_running=0
local counter_pending=0
local counter_unknown=0

# Poll status of each task
for task_id in ${current_tasks[@]}; do
local tmpfile_task=/tmp/task-id-${task_id}

# Add session header
curl -s --header "session-header:${taskToHeader[${task_id}]}" -L -o ${tmpfile_task} "${platform_url}/api/task/${task_id}"
rv=$?

echo "[${task_id}] Curl response code is: ${rv}"
task_status=$(jq -r '.state' ${tmpfile_task})
echo "[${task_id}] Task is still ${task_status}..."

# Evaluate task status
case $task_status in
pending)
counter_pending=$((${counter_pending} +1))
;;

running)
counter_running=$((${counter_running} +1))
;;

rejected)
counter_rejected=$((${counter_rejected} +1))
;;

done)
counter_done=$((${counter_done} +1))
counter_total_done=$((${counter_total_done} +1))
finished_tasks+=(${task_id})
;;

*)
counter_unknown=$((${counter_unknown} +1))
;;
esac
done
echo "Summary: Done: ${counter_done}, Rejected: ${counter_rejected}, Running: ${counter_running}, Pending: ${counter_pending}, Unknown: ${counter_unknown}"
}

rm -f /tmp/task-id-*

if [ ! -f ${voice_file} ]; then
echo "Voicefile does not exists!"
exit 1
fi

current_tasks=()
declare -A taskToHeader
task_counter=1
start_time=$(date '+%s')
counter_total_done=0

# Control loop
while true; do
finished_tasks=()
poll_tasks

# Remove finished tasks
for del in ${finished_tasks[@]}
do
current_tasks=(${current_tasks[@]/$del})
done

echo "Task counter: ${task_counter}, Finished tasks: ${counter_total_done}"
if [ ${counter_total_done} -ge ${total_tasks} ]; then
echo "Reached ${total_tasks} finished tasks."
echo "Start time: ${start_time}"
end_time=$(date '+%s')
echo "End time: ${end_time}"
echo "Duration: $(( ${end_time} - ${start_time} ))"
echo "Voicefile: ${voice_file}"
echo "task parallelism: ${parallel}"
break
fi

# POST tasks to have $parallel tasks running all the time
if [ ${#current_tasks[@]} -le $parallel ]; then
post_tasks $(($parallel - ${#current_tasks[@]}))
fi

sleep 2
done