Rodgger
2 years ago
2 changed files with 364 additions and 49 deletions
@ -0,0 +1,306 @@
|
||||
#!/usr/bin/env perl |
||||
|
||||
# |
||||
# AGI script that renders speech to text using Google's Cloud Speech API. |
||||
# |
||||
# Copyright (C) 2011 - 2016, Lefteris Zafiris <zaf@fastmail.com> |
||||
# |
||||
# This program is free software, distributed under the terms of |
||||
# the GNU General Public License Version 2. See the COPYING file |
||||
# at the top of the source tree. |
||||
# |
||||
# ----- |
||||
# Usage |
||||
# ----- |
||||
# agi(speech-recog.agi,[lang],[timeout],[intkey],[NOBEEP],[rtimeout],[speechContexts]) |
||||
# Records from the current channel until 2 seconds of silence are detected |
||||
# (this can be set by the user by the 'timeout' argument, -1 for no timeout) or the |
||||
# interrupt key (# by default) is pressed. If NOBEEP is set, no beep sound is played |
||||
# back to the user to indicate the start of the recording. If 'rtimeout' is set, |
||||
# overwrite to the absolute recording timeout. 'SpeechContext' provides hints to |
||||
# favor specific words and phrases in the results. Usage: [Agamemnon,Midas] |
||||
# The recorded sound is send over to Google speech recognition service and the |
||||
# returned text string is assigned as the value of the channel variable 'utterance'. |
||||
# The scripts sets the following channel variables: |
||||
# utterance : The generated text string. |
||||
# confidence : A value between 0 and 1 indicating how 'confident' the recognition engine |
||||
# feels about the result. Values bigger than 0.95 usually mean that the |
||||
# resulted text is correct. |
||||
# |
||||
# User defined parameters: |
||||
# Speech API key from Google: |
||||
# $key |
||||
# |
||||
# Default language: |
||||
# $language |
||||
# |
||||
# Default timeout: |
||||
# $timeout (value in seconds of silence before recording is stopped) |
||||
# |
||||
# Default interrupt key: |
||||
# $intkey (can be any digit from 0 to 9 or # and *, or a combination of them) |
||||
# |
||||
# Sample rate: |
||||
# $samplerate (value in Hz. 0 for automatic detection per channel/call, 16000 for |
||||
# use with wideband codecs, 8000 for traditional codecs. |
||||
# |
||||
# Profanity filter: |
||||
# $pro_filter ('false':disable, 'true': remove profanities) |
||||
# |
||||
|
||||
use warnings; |
||||
use strict; |
||||
use File::Copy qw(move); |
||||
use File::Temp qw(tempfile); |
||||
use LWP::UserAgent; |
||||
use JSON; |
||||
use Encode qw(encode); |
||||
use MIME::Base64; |
||||
|
||||
$| = 1; |
||||
|
||||
# ----------------------------- # |
||||
# User defined parameters: # |
||||
# ----------------------------- # |
||||
# Speech API key # |
||||
my $key = ""; |
||||
|
||||
# Default language # |
||||
my $language = "en-US"; |
||||
|
||||
# Default max silence timeout # |
||||
my $timeout = 2; |
||||
|
||||
# Absolute Recording timeout # |
||||
my $abs_timeout = -1; |
||||
|
||||
# Default interrupt key # |
||||
my $intkey = "#"; |
||||
|
||||
# Input audio sample rate # |
||||
# Leave blank to auto-detect # |
||||
my $samplerate = ""; |
||||
|
||||
# Profanity filter # |
||||
my $pro_filter = "false"; |
||||
|
||||
# Verbose debugging messages # |
||||
my $debug = 0; |
||||
|
||||
# ----------------------------- # |
||||
|
||||
my %AGI; |
||||
my $format; |
||||
my @result; |
||||
my $silence; |
||||
my $results = 1; |
||||
my $beep = "BEEP"; |
||||
my $comp_level = -8; |
||||
my $ua_timeout = 30; |
||||
my $tmpdir = "/tmp"; |
||||
my $url = "https://speech.googleapis.com/v1/speech"; |
||||
my $phrases = ""; |
||||
my @phrases = []; |
||||
|
||||
# Store AGI input # |
||||
($AGI{arg_1}, $AGI{arg_2}, $AGI{arg_3}, $AGI{arg_4}, $AGI{arg_5}, $AGI{arg_6}) = @ARGV; |
||||
while (<STDIN>) { |
||||
chomp; |
||||
last if (!length); |
||||
$AGI{$1} = $2 if (/^agi_(\w+)\:\s+(.*)$/); |
||||
} |
||||
|
||||
my $name = " -- $AGI{request}:"; |
||||
|
||||
# Reset variables. # |
||||
warn "$name Clearing channel variables.\n" if ($debug); |
||||
my %response = ( |
||||
utterance => -1, |
||||
confidence => -1, |
||||
); |
||||
set_channel_vars(%response); |
||||
|
||||
# Abort if key is missing or required programs not found. # |
||||
if (!$key) { |
||||
print "VERBOSE \"API key is missing. Aborting.\" 3\n"; |
||||
checkresponse(); |
||||
die "$name API key is missing. Aborting.\n"; |
||||
} |
||||
my $flac = `/usr/bin/which flac`; |
||||
die "$name flac is missing. Aborting.\n" if (!$flac); |
||||
chomp($flac); |
||||
warn "$name Found flac in: $flac\n" if ($debug); |
||||
|
||||
# Setting language, timeout, interrupt keys and BEEP indication # |
||||
if (length($AGI{arg_1})) { |
||||
$language = $AGI{arg_1} if ($AGI{arg_1} =~ /^[a-z]{2}(-[a-zA-Z]{2,6})?$/); |
||||
} |
||||
|
||||
if (length($AGI{arg_2})) { |
||||
if ($AGI{arg_2} == -1) { |
||||
$silence = ""; |
||||
} elsif ($AGI{arg_2} =~ /^\d+$/) { |
||||
$silence = "s=$AGI{arg_2}"; |
||||
} else { |
||||
$silence = "s=$timeout"; |
||||
} |
||||
} else { |
||||
$silence = "s=$timeout"; |
||||
} |
||||
|
||||
if (length($AGI{arg_3})) { |
||||
$intkey = "0123456789#*" if ($AGI{arg_3} eq "any"); |
||||
$intkey = $AGI{arg_3} if ($AGI{arg_3} =~ /^[0-9*#]+$/); |
||||
} |
||||
|
||||
if (length($AGI{arg_4})) { |
||||
$beep = "" if ($AGI{arg_4} eq "NOBEEP"); |
||||
} |
||||
|
||||
if (length($AGI{arg_5})) { |
||||
$abs_timeout = $AGI{arg_5}; |
||||
} |
||||
|
||||
if (length($AGI{arg_6})) { |
||||
$phrases = $AGI{arg_6}; |
||||
$phrases = substr($phrases,1,-1); |
||||
@phrases = split(',',$phrases); |
||||
} |
||||
|
||||
# Answer channel if not already answered # |
||||
warn "$name Checking channel status.\n" if ($debug); |
||||
print "CHANNEL STATUS\n"; |
||||
@result = checkresponse(); |
||||
if ($result[0] == 4) { |
||||
warn "$name Answering channel.\n" if ($debug); |
||||
print "ANSWER\n"; |
||||
@result = checkresponse(); |
||||
if ($result[0] != 0) { |
||||
die "$name Failed to answer channel.\n"; |
||||
} |
||||
} |
||||
|
||||
# Setting recording file format according to sample rate. # |
||||
if (!$samplerate) { ($format, $samplerate) = detect_format(); } |
||||
elsif ($samplerate == 12000) { $format = "sln12"; } |
||||
elsif ($samplerate == 16000) { $format = "sln16"; } |
||||
elsif ($samplerate == 32000) { $format = "sln32"; } |
||||
elsif ($samplerate == 44100) { $format = "sln44"; } |
||||
elsif ($samplerate == 48000) { $format = "sln48"; } |
||||
else { ($format, $samplerate) = ("sln", 8000); } |
||||
|
||||
# Initialize User agent # |
||||
my $ua = LWP::UserAgent->new(ssl_opts => {verify_hostname => 1}); |
||||
$ua->agent("Asterisk AGI speech recognition script"); |
||||
$ua->env_proxy; |
||||
$ua->timeout($ua_timeout); |
||||
|
||||
# Handle interrupts # |
||||
$SIG{'INT'} = \&int_handler; |
||||
$SIG{'HUP'} = \&int_handler; |
||||
|
||||
# Record file # |
||||
my ($fh, $tmpname) = tempfile("stt_XXXXXX", DIR => $tmpdir, UNLINK => 1); |
||||
print "RECORD FILE $tmpname $format \"$intkey\" \"$abs_timeout\" $beep \"$silence\"\n"; |
||||
@result = checkresponse(); |
||||
die "$name Failed to record file, aborting...\n" if ($result[0] == -1); |
||||
|
||||
if ($debug) { |
||||
warn "$name Recording Format: $format, Rate: $samplerate Hz, ", |
||||
"Language: $language, ", "$silence, Interrupt keys: $intkey\n"; |
||||
} |
||||
|
||||
# Encode audio data to flac # |
||||
my $endian = (unpack("h*", pack("s", 1)) =~ /01/) ? "big" : "little"; |
||||
system($flac, $comp_level, "--totally-silent", "--channels=1", "--endian=$endian", |
||||
"--sign=signed", "--bps=16", "--force-raw-format", "--sample-rate=$samplerate", |
||||
"$tmpname.$format") == 0 or die "$name $flac failed: $?\n"; |
||||
open($fh, "<", "$tmpname.flac") or die "Can't read file: $!"; |
||||
|
||||
my $audio = do { local $/; <$fh> }; |
||||
close($fh); |
||||
|
||||
my %config = ( |
||||
"encoding" => "FLAC", |
||||
"sampleRateHertz" => $samplerate, |
||||
"languageCode" => $language, |
||||
"profanityFilter" => $pro_filter, |
||||
"speechContexts" => {"phrases" => \@phrases}, |
||||
); |
||||
my %audio = ( "content" => encode_base64($audio, "") ); |
||||
|
||||
my %json = ( |
||||
"config" => \%config, |
||||
"audio" => \%audio, |
||||
); |
||||
# Send audio data for analysis # |
||||
my $uaresponse = $ua->post( |
||||
"$url:recognize?key=$key", |
||||
Content_Type => "application/json", |
||||
Content => encode_json(\%json), |
||||
); |
||||
|
||||
warn "$name The response was:\n", $uaresponse->content if ($debug); |
||||
if (!$uaresponse->is_success) { |
||||
print "VERBOSE \"Unable to get recognition data.\" 3\n"; |
||||
checkresponse(); |
||||
die "$name Unable to get recognition data.\n"; |
||||
} |
||||
my $jdata = decode_json($uaresponse->content); |
||||
$response{utterance} = encode('utf8', $jdata->{"results"}[0]->{"alternatives"}[0]->{"transcript"}); |
||||
$response{confidence} = $jdata->{"results"}[0]->{"alternatives"}[0]->{"confidence"}; |
||||
|
||||
set_channel_vars(%response); |
||||
exit; |
||||
|
||||
sub set_channel_vars { |
||||
my %resp = @_; |
||||
foreach (keys %resp) { |
||||
warn "$name Setting variable: $_ = $response{$_}\n" if ($debug); |
||||
print "SET VARIABLE \"$_\" \"$response{$_}\"\n"; |
||||
checkresponse(); |
||||
} |
||||
} |
||||
|
||||
sub checkresponse { |
||||
my $input = <STDIN>; |
||||
my @values; |
||||
|
||||
chomp $input; |
||||
if ($input =~ /^200 result=(-?\d+)\s?(.*)$/) { |
||||
warn "$name Command returned: $input\n" if ($debug); |
||||
@values = ("$1", "$2"); |
||||
} else { |
||||
$input .= <STDIN> if ($input =~ /^520-Invalid/); |
||||
warn "$name Unexpected result: $input\n"; |
||||
@values = (-1, -1); |
||||
} |
||||
return @values; |
||||
} |
||||
|
||||
sub detect_format { |
||||
# Detect the sound format used # |
||||
my @format; |
||||
print "GET FULL VARIABLE \${CHANNEL(audionativeformat)}\n"; |
||||
my @reply = checkresponse(); |
||||
for ($reply[1]) { |
||||
if (/(silk|sln)12/) { @format = ("sln12", 12000); } |
||||
elsif (/(speex|slin|silk)16|g722|siren7/) { @format = ("sln16", 16000); } |
||||
elsif (/(speex|slin|celt)32|siren14/) { @format = ("sln32", 32000); } |
||||
elsif (/(celt|slin)44/) { @format = ("sln44", 44100); } |
||||
elsif (/(celt|slin)48/) { @format = ("sln48", 48000); } |
||||
else { @format = ("sln", 8000); } |
||||
} |
||||
return @format; |
||||
} |
||||
|
||||
sub int_handler { |
||||
die "$name Interrupt signal received, terminating...\n"; |
||||
} |
||||
|
||||
END { |
||||
if ($tmpname) { |
||||
warn "$name Cleaning temp files.\n" if ($debug); |
||||
unlink glob "$tmpname.*"; |
||||
} |
||||
} |
Loading…
Reference in new issue