Version: 4.0.2

Load Balancing

The performance of a single instance of the virtual appliance is of course limited by the HW resources and by the number of concurrent tasks the API component can handle. To work around these limitations, we can advise you to deploy multiple instances of the virtual appliance and put a load balancer before them.

How the load balancer works

The load balancer (LB) must ensure that the requests for the same task are routed to the same instance of the virtual appliance. This is called a stateful session. It can be achieved with a session cookie or with a session header.

The request flow is then following:

The client POSTs a task to the LB.
The LB picks a virtual appliance instance (depending on an LB algorithm) and sends the request there.
The API in the virtual appliance accepts the task and sends a response back to the LB.
The LB adds a session cookie or a session header to the response and sends it back to the client.
The client extracts the task id and the session cookie or session header from the response.
The client polls for the task. It sends a GET request with the session cookie or session header to the LB.
The LB routes the request to the proper instance of the virtual appliance based on the session cookie or session header.

In the following example, have used Envoy as the load balancer. Any other load balancer can be used if it supports stateful sessions.

Envoy configuration

This is the example Envoy configuration:

static_resources:
  listeners:
    - address:
        socket_address:
          # Load balancer address and port
          # This is where Envoy accepts the incoming traffic
          address: 0.0.0.0
          port_value: 8080
      filter_chains:
        - filters:
            - name: envoy.filters.network.http_connection_manager
              typed_config:
                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
                access_log:
                  - name: envoy.access_loggers.stdout
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
                      log_format:
                        text_format_source:
                          inline_string: >
                            [%START_TIME%] "%REQ(:METHOD)%
                            %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%"
                            %RESPONSE_CODE% %RESPONSE_FLAGS%
                            %RESPONSE_CODE_DETAILS%
                            %UPSTREAM_REQUEST_ATTEMPT_COUNT% %BYTES_RECEIVED%
                            %BYTES_SENT% %DURATION%
                            %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%
                            "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%"
                            "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%"
                            "%UPSTREAM_HOST%" "%REQ(REQUEST-ID)%"
                            "%REQ(CORRELATION-ID)%" "%REQ(session-header)%"
                            "%RESP(session-header)%"
                codec_type: AUTO
                stat_prefix: ingress_http
                route_config:
                  name: local_route
                  virtual_hosts:
                    - name: backend
                      domains:
                        - "*"
                      routes:
                        - match:
                            prefix: "/api/"
                          route:
                            cluster: speech-platform-virtual-appliance
                            retry_policy:
                              retry_on: "retriable-status-codes"
                              # Retry request on a different upstream when the 429 response is received
                              # This should happen when POSTing a request/task but max concurrent tasks limit is reached
                              # This ensures that task is accepted in the other (== less busy) instance of the virtual appliance
                              retriable_status_codes:
                                - 429
                              # How many times is the request retried
                              # Should be # of virtual appliance instances minus 1
                              num_retries: 1

                http_filters:
                  - name: envoy.filters.http.stateful_session
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.stateful_session.v3.StatefulSession
                      strict: true
                      session_state:
                        name: envoy.http.stateful_session.header
                        typed_config:
                          "@type": type.googleapis.com/envoy.extensions.http.stateful_session.header.v3.HeaderBasedSessionState
                          # Name of the session header
                          # Contains base64 encoded upstream_address:port
                          # This tells Envoy to which upstream server it should send the request
                          name: session-header
                  - name: envoy.filters.http.router
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
  clusters:
    - name: speech-platform-virtual-appliance
      connect_timeout: 0.5s
      type: STATIC
      dns_lookup_family: V4_ONLY
      lb_policy: RANDOM
      load_assignment:
        cluster_name: speech-platform-virtual-appliance
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      # IP address of the first instance of the virtual appliance
                      address: 1.2.3.4
                      # Port of the first instance of the virtual appliance
                      port_value: 80
              - endpoint:
                  address:
                    socket_address:
                      # IP address of the second instance of the virtual appliance
                      address: 1.2.3.5
                      # Port of the second instance of the virtual appliance
                      port_value: 80
      health_checks:
        - timeout: 2s
          interval: 60s
          interval_jitter: 1s
          unhealthy_threshold: 3
          healthy_threshold: 3
          http_health_check:
            # Healthcheck uri of the speech api inside virtual appliance
            path: /api/system/status

# Admin interface for looking at things
admin:
  address:
    socket_address:
      address: 0.0.0.0
      port_value: 9090

Access API with LB

Here is an example script to show how to work with a header-based stateful session using curl:

#!/bin/bash

# URL of the virtual appliance or load balancer
platform_url=http://localhost:8080

# URI to POST the task to
uri="/api/technology/speech-to-text?language=en"

# Path to audio file for processing
voice_file=/tmp/audio.wav

# Proccess this many tasks in parallel
parallel=100

# End when this many tasks are processed
total_tasks=400

# Post tasks to the API so that we still have $parallel tasks running
post_tasks() {
  local count=$1
  local tmpfile_task=/tmp/task.${task_counter}.json
  local tmpfile_headers=/tmp/headers.${task_counter}.txt

  for i in $(seq 1 $count); do
    # POST single task
    echo "[${task_counter}] Task $i of ${count}"
    curl \
      -L -s -X POST  \
      -H 'Content-Type: multipart/form-data' \
      -H 'Accept: application/json' \
      -F file=@"${voice_file}" \
      --output ${tmpfile_task} \
      --dump-header ${tmpfile_headers} \
      "${platform_url}${uri}"

    rv=$?

    # Parse session header
    session_header=$(grep session-header ${tmpfile_headers} | cut -d ':' -f 2)
    echo "[${task_counter}] Curl response code is: ${rv}"
    echo "[${task_counter}] Session header is: ${session_header}"
    echo "[${task_counter}] $(cat ${tmpfile_task})"


    task_id=$(jq -r '.task.task_id' ${tmpfile_task})
    # Store task id
    current_tasks+=($task_id)
    # Store session header for each task id
    taskToHeader["${task_id}"]="${session_header}"
    task_counter=$((${task_counter} +1))
  done
}

# Poll for all running tasks
poll_tasks() {
  local counter_done=0
  local counter_rejected=0
  local counter_running=0
  local counter_pending=0
  local counter_unknown=0

  # Poll status of each task
  for task_id in ${current_tasks[@]}; do
    local tmpfile_task=/tmp/task-id-${task_id}

    # Add session header
    curl -s --header "session-header:${taskToHeader[${task_id}]}" -L -o ${tmpfile_task} "${platform_url}/api/task/${task_id}"
    rv=$?

    echo "[${task_id}] Curl response code is: ${rv}"
    task_status=$(jq -r '.state' ${tmpfile_task})
    echo "[${task_id}] Task is still ${task_status}..."

    # Evaluate task status
    case $task_status in
      pending)
        counter_pending=$((${counter_pending} +1))
      ;;

      running)
        counter_running=$((${counter_running} +1))
      ;;

      rejected)
        counter_rejected=$((${counter_rejected} +1))
      ;;

      done)
        counter_done=$((${counter_done} +1))
        counter_total_done=$((${counter_total_done} +1))
        finished_tasks+=(${task_id})
      ;;

      *)
        counter_unknown=$((${counter_unknown} +1))
      ;;
    esac
  done
  echo "Summary: Done: ${counter_done}, Rejected: ${counter_rejected}, Running: ${counter_running}, Pending: ${counter_pending}, Unknown: ${counter_unknown}"
}

rm -f /tmp/task-id-*

if [ ! -f ${voice_file} ]; then
  echo "Voicefile does not exists!"
  exit 1
fi

current_tasks=()
declare -A taskToHeader
task_counter=1
start_time=$(date '+%s')
counter_total_done=0

# Control loop
while true; do
  finished_tasks=()
  poll_tasks

  # Remove finished tasks
  for del in ${finished_tasks[@]}
  do
    current_tasks=(${current_tasks[@]/$del})
  done

  echo "Task counter: ${task_counter}, Finished tasks: ${counter_total_done}"
  if [ ${counter_total_done} -ge ${total_tasks} ]; then
    echo "Reached ${total_tasks} finished tasks."
    echo "Start time: ${start_time}"
    end_time=$(date '+%s')
    echo "End time: ${end_time}"
    echo "Duration: $(( ${end_time} - ${start_time} ))"
    echo "Voicefile: ${voice_file}"
    echo "task parallelism: ${parallel}"
    break
  fi

  # POST tasks to have $parallel tasks running all the time
  if [ ${#current_tasks[@]} -le $parallel ]; then
    post_tasks $(($parallel - ${#current_tasks[@]}))
  fi

  sleep 2
done

How the load balancer works​

Envoy configuration​

Access API with LB​

How the load balancer works

Envoy configuration

Access API with LB