Skip to the content.

Please visit https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support?tabs=tts to see the available list of all Azure voices, model and language to be given in the config. It follows the notation "{language}-{voice}{model}"

Step 1: Create an Agent using the POST API http://localhost:5001/agent with the payload:

{
  "agent_config": {
      "agent_name": "Alfred",
      "agent_type": "other",
      "agent_welcome_message": "How are you doing Bruce?",
      "tasks": [
          {
              "task_type": "conversation",
              "toolchain": {
                  "execution": "parallel",
                  "pipelines": [
                      [
                          "transcriber",
                          "llm",
                          "synthesizer"
                      ]
                  ]
              },
              "tools_config": {
                  "input": {
                      "format": "wav",
                      "provider": "twilio"
                  },
                  "llm_agent": {
                      "agent_type": "simple_llm_agent",
                      "agent_flow_type": "streaming",
                      "routes": null,
                      "llm_config": {
                          "agent_flow_type": "streaming",
                          "provider": "openai",
                          "request_json": true,
                          "model": "gpt-4o-mini"
                      }
                  },
                  "output": {
                      "format": "wav",
                      "provider": "twilio"
                  },
                  "synthesizer": {
                      "audio_format": "wav",
                      "provider": "azuretts",
                      "stream": true,
                      "provider_config": {
                          "voice": "Sonia",
                          "model": "neural",
                          "language": "en-GB"
                      },
                      "buffer_size": 100.0
                  },
                  "transcriber": {
                      "encoding": "linear16",
                      "language": "en",
                      "provider": "deepgram",
                      "stream": true
                  }
              },
              "task_config": {
                  "hangup_after_silence": 30.0
              }
          }
      ]
  },
  "agent_prompts": {
      "task_1": {
          "system_prompt": "Why Do We Fall, Sir? So That We Can Learn To Pick Ourselves Up."
      }
  }
}


Step 2: The response of the previous API will return the agent_id.
Use this agent_id to initiate a call via the telephony server running on 8001 port (for Twilio) http://localhost:8001/call.

  {
    "agent_id": "4c19700b-227c-4c2d-8bgf-42dfe4b240fc",
    "recipient_phone_number": "+19876543210"
  }