forked from GoogleCloudPlatform/golang-samples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
captionasync.go
114 lines (95 loc) · 2.71 KB
/
captionasync.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
// Copyright 2016 Google Inc. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
// Command captionasync sends audio data to the Google Speech API
// and prints its transcript.
package main
import (
"fmt"
"io/ioutil"
"log"
"os"
"strings"
"golang.org/x/net/context"
speech "cloud.google.com/go/speech/apiv1"
speechpb "google.golang.org/genproto/googleapis/cloud/speech/v1"
)
const usage = `Usage: captionasync <audiofile>
Audio file must be a 16-bit signed little-endian encoded
with a sample rate of 16000.
The path to the audio file may be a GCS URI (gs://...).
`
func main() {
if len(os.Args) < 2 {
fmt.Fprintln(os.Stderr, usage)
os.Exit(2)
}
var sendFunc func(*speech.Client, string) (*speechpb.LongRunningRecognizeResponse, error)
path := os.Args[1]
if strings.Contains(path, "://") {
sendFunc = sendGCS
} else {
sendFunc = send
}
ctx := context.Background()
client, err := speech.NewClient(ctx)
if err != nil {
log.Fatal(err)
}
resp, err := sendFunc(client, os.Args[1])
if err != nil {
log.Fatal(err)
}
// [START print]
// Print the results.
for _, result := range resp.Results {
for _, alt := range result.Alternatives {
fmt.Printf("\"%v\" (confidence=%3f)\n", alt.Transcript, alt.Confidence)
}
}
// [END print]
}
func send(client *speech.Client, filename string) (*speechpb.LongRunningRecognizeResponse, error) {
ctx := context.Background()
data, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
// Send the contents of the audio file with the encoding and
// and sample rate information to be transcripted.
req := &speechpb.LongRunningRecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 16000,
LanguageCode: "en-US",
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Content{Content: data},
},
}
op, err := client.LongRunningRecognize(ctx, req)
if err != nil {
return nil, err
}
return op.Wait(ctx)
}
func sendGCS(client *speech.Client, gcsURI string) (*speechpb.LongRunningRecognizeResponse, error) {
ctx := context.Background()
// Send the contents of the audio file with the encoding and
// and sample rate information to be transcripted.
req := &speechpb.LongRunningRecognizeRequest{
Config: &speechpb.RecognitionConfig{
Encoding: speechpb.RecognitionConfig_LINEAR16,
SampleRateHertz: 16000,
LanguageCode: "en-US",
},
Audio: &speechpb.RecognitionAudio{
AudioSource: &speechpb.RecognitionAudio_Uri{Uri: gcsURI},
},
}
op, err := client.LongRunningRecognize(ctx, req)
if err != nil {
return nil, err
}
return op.Wait(ctx)
}