Bump version to 1.0.9; update JSON schemas; add new APIs

This commit is contained in:
Sebastian Thiel
2019-07-05 11:32:35 +08:00
parent 99e97ceece
commit e42ebc0c2b
2442 changed files with 190984 additions and 71186 deletions

View File

@@ -152,7 +152,7 @@
}
}
},
"revision": "20190322",
"revision": "20190628",
"rootUrl": "https://texttospeech.googleapis.com/",
"schemas": {
"AudioConfig": {
@@ -160,7 +160,7 @@
"id": "AudioConfig",
"properties": {
"audioEncoding": {
"description": "Required. The format of the requested audio byte stream.",
"description": "Required. The format of the audio byte stream.",
"enum": [
"AUDIO_ENCODING_UNSPECIFIED",
"LINEAR16",
@@ -170,35 +170,35 @@
"enumDescriptions": [
"Not specified. Will return result google.rpc.Code.INVALID_ARGUMENT.",
"Uncompressed 16-bit signed little-endian samples (Linear PCM).\nAudio content returned as LINEAR16 also contains a WAV header.",
"MP3 audio.",
"MP3 audio at 32kbps.",
"Opus encoded audio wrapped in an ogg container. The result will be a\nfile which can be played natively on Android, and in browsers (at least\nChrome and Firefox). The quality of the encoding is considerably higher\nthan MP3 while using approximately the same bitrate."
],
"type": "string"
},
"effectsProfileId": {
"description": "An identifier which selects 'audio effects' profiles that are applied on\n(post synthesized) text to speech.\nEffects are applied on top of each other in the order they are given.\nSee\n\n[audio-profiles](https:\n//cloud.google.com/text-to-speech/docs/audio-profiles)\nfor current supported profile ids.",
"description": "Optional. Input only. An identifier which selects 'audio effects' profiles\nthat are applied on (post synthesized) text to speech. Effects are applied\non top of each other in the order they are given. See\n[audio\nprofiles](https://cloud.google.com/text-to-speech/docs/audio-profiles) for\ncurrent supported profile ids.",
"items": {
"type": "string"
},
"type": "array"
},
"pitch": {
"description": "Optional speaking pitch, in the range [-20.0, 20.0]. 20 means increase 20\nsemitones from the original pitch. -20 means decrease 20 semitones from the\noriginal pitch.",
"description": "Optional. Input only. Speaking pitch, in the range [-20.0, 20.0]. 20 means\nincrease 20 semitones from the original pitch. -20 means decrease 20\nsemitones from the original pitch.",
"format": "double",
"type": "number"
},
"sampleRateHertz": {
"description": "The synthesis sample rate (in hertz) for this audio. Optional. If this is\ndifferent from the voice's natural sample rate, then the synthesizer will\nhonor this request by converting to the desired sample rate (which might\nresult in worse audio quality), unless the specified sample rate is not\nsupported for the encoding chosen, in which case it will fail the request\nand return google.rpc.Code.INVALID_ARGUMENT.",
"description": "The synthesis sample rate (in hertz) for this audio. Optional. When this is\nspecified in SynthesizeSpeechRequest, if this is different from the voice's\nnatural sample rate, then the synthesizer will honor this request by\nconverting to the desired sample rate (which might result in worse audio\nquality), unless the specified sample rate is not supported for the\nencoding chosen, in which case it will fail the request and return\ngoogle.rpc.Code.INVALID_ARGUMENT.",
"format": "int32",
"type": "integer"
},
"speakingRate": {
"description": "Optional speaking rate/speed, in the range [0.25, 4.0]. 1.0 is the normal\nnative speed supported by the specific voice. 2.0 is twice as fast, and\n0.5 is half as fast. If unset(0.0), defaults to the native 1.0 speed. Any\nother values < 0.25 or > 4.0 will return an error.",
"description": "Optional. Input only. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is\nthe normal native speed supported by the specific voice. 2.0 is twice as\nfast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0\nspeed. Any other values < 0.25 or > 4.0 will return an error.",
"format": "double",
"type": "number"
},
"volumeGainDb": {
"description": "Optional volume gain (in dB) of the normal native volume supported by the\nspecific voice, in the range [-96.0, 16.0]. If unset, or set to a value of\n0.0 (dB), will play at normal native signal amplitude. A value of -6.0 (dB)\nwill play at approximately half the amplitude of the normal native signal\namplitude. A value of +6.0 (dB) will play at approximately twice the\namplitude of the normal native signal amplitude. Strongly recommend not to\nexceed +10 (dB) as there's usually no effective increase in loudness for\nany value greater than that.",
"description": "Optional. Input only. Volume gain (in dB) of the normal native volume\nsupported by the specific voice, in the range [-96.0, 16.0]. If unset, or\nset to a value of 0.0 (dB), will play at normal native signal amplitude. A\nvalue of -6.0 (dB) will play at approximately half the amplitude of the\nnormal native signal amplitude. A value of +6.0 (dB) will play at\napproximately twice the amplitude of the normal native signal amplitude.\nStrongly recommend not to exceed +10 (dB) as there's usually no effective\nincrease in loudness for any value greater than that.",
"format": "double",
"type": "number"
}
@@ -258,7 +258,7 @@
"id": "SynthesizeSpeechResponse",
"properties": {
"audioContent": {
"description": "The audio data bytes encoded as specified in the request, including the\nheader (For LINEAR16 audio, we include the WAV header). Note: as\nwith all bytes fields, protobuffers use a pure binary representation,\nwhereas JSON representations use base64.",
"description": "The audio data bytes encoded as specified in the request, including the\nheader for encodings that are wrapped in containers (e.g. MP3, OGG_OPUS).\nFor LINEAR16 audio, we include the WAV header. Note: as\nwith all bytes fields, protobuffers use a pure binary representation,\nwhereas JSON representations use base64.",
"format": "byte",
"type": "string"
}

View File

@@ -152,7 +152,7 @@
}
}
},
"revision": "20190322",
"revision": "20190628",
"rootUrl": "https://texttospeech.googleapis.com/",
"schemas": {
"AudioConfig": {
@@ -160,7 +160,7 @@
"id": "AudioConfig",
"properties": {
"audioEncoding": {
"description": "Required. The format of the requested audio byte stream.",
"description": "Required. The format of the audio byte stream.",
"enum": [
"AUDIO_ENCODING_UNSPECIFIED",
"LINEAR16",
@@ -170,35 +170,35 @@
"enumDescriptions": [
"Not specified. Will return result google.rpc.Code.INVALID_ARGUMENT.",
"Uncompressed 16-bit signed little-endian samples (Linear PCM).\nAudio content returned as LINEAR16 also contains a WAV header.",
"MP3 audio.",
"MP3 audio at 32kbps.",
"Opus encoded audio wrapped in an ogg container. The result will be a\nfile which can be played natively on Android, and in browsers (at least\nChrome and Firefox). The quality of the encoding is considerably higher\nthan MP3 while using approximately the same bitrate."
],
"type": "string"
},
"effectsProfileId": {
"description": "An identifier which selects 'audio effects' profiles that are applied on\n(post synthesized) text to speech.\nEffects are applied on top of each other in the order they are given.\nSee\n\n[audio-profiles](https:\n//cloud.google.com/text-to-speech/docs/audio-profiles)\nfor current supported profile ids.",
"description": "Optional. Input only. An identifier which selects 'audio effects' profiles\nthat are applied on (post synthesized) text to speech. Effects are applied\non top of each other in the order they are given. See\n[audio\nprofiles](https://cloud.google.com/text-to-speech/docs/audio-profiles) for\ncurrent supported profile ids.",
"items": {
"type": "string"
},
"type": "array"
},
"pitch": {
"description": "Optional speaking pitch, in the range [-20.0, 20.0]. 20 means increase 20\nsemitones from the original pitch. -20 means decrease 20 semitones from the\noriginal pitch.",
"description": "Optional. Input only. Speaking pitch, in the range [-20.0, 20.0]. 20 means\nincrease 20 semitones from the original pitch. -20 means decrease 20\nsemitones from the original pitch.",
"format": "double",
"type": "number"
},
"sampleRateHertz": {
"description": "The synthesis sample rate (in hertz) for this audio. Optional. If this is\ndifferent from the voice's natural sample rate, then the synthesizer will\nhonor this request by converting to the desired sample rate (which might\nresult in worse audio quality), unless the specified sample rate is not\nsupported for the encoding chosen, in which case it will fail the request\nand return google.rpc.Code.INVALID_ARGUMENT.",
"description": "The synthesis sample rate (in hertz) for this audio. Optional. When this is\nspecified in SynthesizeSpeechRequest, if this is different from the voice's\nnatural sample rate, then the synthesizer will honor this request by\nconverting to the desired sample rate (which might result in worse audio\nquality), unless the specified sample rate is not supported for the\nencoding chosen, in which case it will fail the request and return\ngoogle.rpc.Code.INVALID_ARGUMENT.",
"format": "int32",
"type": "integer"
},
"speakingRate": {
"description": "Optional speaking rate/speed, in the range [0.25, 4.0]. 1.0 is the normal\nnative speed supported by the specific voice. 2.0 is twice as fast, and\n0.5 is half as fast. If unset(0.0), defaults to the native 1.0 speed. Any\nother values < 0.25 or > 4.0 will return an error.",
"description": "Optional. Input only. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is\nthe normal native speed supported by the specific voice. 2.0 is twice as\nfast, and 0.5 is half as fast. If unset(0.0), defaults to the native 1.0\nspeed. Any other values < 0.25 or > 4.0 will return an error.",
"format": "double",
"type": "number"
},
"volumeGainDb": {
"description": "Optional volume gain (in dB) of the normal native volume supported by the\nspecific voice, in the range [-96.0, 16.0]. If unset, or set to a value of\n0.0 (dB), will play at normal native signal amplitude. A value of -6.0 (dB)\nwill play at approximately half the amplitude of the normal native signal\namplitude. A value of +6.0 (dB) will play at approximately twice the\namplitude of the normal native signal amplitude. Strongly recommend not to\nexceed +10 (dB) as there's usually no effective increase in loudness for\nany value greater than that.",
"description": "Optional. Input only. Volume gain (in dB) of the normal native volume\nsupported by the specific voice, in the range [-96.0, 16.0]. If unset, or\nset to a value of 0.0 (dB), will play at normal native signal amplitude. A\nvalue of -6.0 (dB) will play at approximately half the amplitude of the\nnormal native signal amplitude. A value of +6.0 (dB) will play at\napproximately twice the amplitude of the normal native signal amplitude.\nStrongly recommend not to exceed +10 (dB) as there's usually no effective\nincrease in loudness for any value greater than that.",
"format": "double",
"type": "number"
}
@@ -258,7 +258,7 @@
"id": "SynthesizeSpeechResponse",
"properties": {
"audioContent": {
"description": "The audio data bytes encoded as specified in the request, including the\nheader (For LINEAR16 audio, we include the WAV header). Note: as\nwith all bytes fields, protobuffers use a pure binary representation,\nwhereas JSON representations use base64.",
"description": "The audio data bytes encoded as specified in the request, including the\nheader for encodings that are wrapped in containers (e.g. MP3, OGG_OPUS).\nFor LINEAR16 audio, we include the WAV header. Note: as\nwith all bytes fields, protobuffers use a pure binary representation,\nwhereas JSON representations use base64.",
"format": "byte",
"type": "string"
}