Talk Analytics API, REST: Talk.Get
- HTTP request
- Body parameters
- Response
- Talk
- Field
- Transcription
- Phrase
- PhraseText
- Word
- PhraseStatistics
- UtteranceStatistics
- AudioSegmentBoundaries
- DescriptiveStatistics
- Quantile
- RecognitionClassifierResult
- PhraseHighlight
- RecognitionClassifierLabel
- AlgorithmMetadata
- Error
- SpeechStatistics
- SilenceStatistics
- InterruptsStatistics
- InterruptsEvaluation
- ConversationStatistics
- SpeakerStatistics
- Points
- Quiz
- TextClassifiers
- ClassificationResult
- ClassifierStatistics
- Histogram
- Summarization
- SummarizationStatement
- SummarizationField
- Assistants
- AssistantResult
- AssistantFieldResult
- TalkState
- AlgorithmProcessingInfo
rpc for bulk get
HTTP request
POST https://rest-api.speechsense.yandexcloud.net/speechsense/v1/talks/get
Body parameters
{
"organizationId": "string",
"spaceId": "string",
"connectionId": "string",
"projectId": "string",
"talkIds": [
"string"
],
"resultsMask": "string"
}
|
Field |
Description |
|
organizationId |
string id of organization |
|
spaceId |
string id of space |
|
connectionId |
string id of connection to search data |
|
projectId |
string id of project to search data |
|
talkIds[] |
string ids of talks to return. Requesting too many talks may result in "message exceeds maximum size" error. |
|
resultsMask |
string (field-mask) A comma-separated names off ALL fields to be updated. If |
Response
HTTP Code: 200 - OK
{
"talk": [
{
"id": "string",
"organizationId": "string",
"spaceId": "string",
"connectionId": "string",
"projectIds": [
"string"
],
"createdBy": "string",
"createdAt": "string",
"modifiedBy": "string",
"modifiedAt": "string",
"talkFields": [
{
"name": "string",
"value": "string",
"type": "string"
}
],
"transcription": {
"phrases": [
{
"channelNumber": "string",
"startTimeMs": "string",
"endTimeMs": "string",
"phrase": {
"text": "string",
"language": "string",
"normalizedText": "string",
"words": [
{
"word": "string",
"startTimeMs": "string",
"endTimeMs": "string"
}
]
},
"statistics": {
"statistics": {
"speakerTag": "string",
"speechBoundaries": {
"startTimeMs": "string",
"endTimeMs": "string",
"durationSeconds": "string"
},
"totalSpeechMs": "string",
"speechRatio": "string",
"totalSilenceMs": "string",
"silenceRatio": "string",
"wordsCount": "string",
"lettersCount": "string",
"wordsPerSecond": {
"min": "string",
"max": "string",
"mean": "string",
"std": "string",
"quantiles": [
{
"level": "string",
"value": "string"
}
]
},
"lettersPerSecond": {
"min": "string",
"max": "string",
"mean": "string",
"std": "string",
"quantiles": [
{
"level": "string",
"value": "string"
}
]
}
}
},
"classifiers": [
{
"startTimeMs": "string",
"endTimeMs": "string",
"classifier": "string",
"highlights": [
{
"text": "string",
"offset": "string",
"count": "string"
}
],
"labels": [
{
"label": "string",
"confidence": "string"
}
]
}
]
}
],
"algorithmsMetadata": [
{
"createdTaskDate": "string",
"completedTaskDate": "string",
"error": {
"code": "string",
"message": "string"
},
"traceId": "string",
"name": "string"
}
]
},
"speechStatistics": {
"totalSimultaneousSpeechDurationSeconds": "string",
"totalSimultaneousSpeechDurationMs": "string",
"totalSimultaneousSpeechRatio": "string",
"simultaneousSpeechDurationEstimation": {
"min": "string",
"max": "string",
"mean": "string",
"std": "string",
"quantiles": [
{
"level": "string",
"value": "string"
}
]
}
},
"silenceStatistics": {
"totalSimultaneousSilenceDurationMs": "string",
"totalSimultaneousSilenceRatio": "string",
"simultaneousSilenceDurationEstimation": {
"min": "string",
"max": "string",
"mean": "string",
"std": "string",
"quantiles": [
{
"level": "string",
"value": "string"
}
]
},
"totalSimultaneousSilenceDurationSeconds": "string"
},
"interruptsStatistics": {
"speakerInterrupts": [
{
"speakerTag": "string",
"interruptsCount": "string",
"interruptsDurationMs": "string",
"interrupts": [
{
"startTimeMs": "string",
"endTimeMs": "string",
"durationSeconds": "string"
}
],
"interruptsDurationSeconds": "string"
}
]
},
"conversationStatistics": {
"conversationBoundaries": {
"startTimeMs": "string",
"endTimeMs": "string",
"durationSeconds": "string"
},
"speakerStatistics": [
{
"speakerTag": "string",
"completeStatistics": {
"speakerTag": "string",
"speechBoundaries": {
"startTimeMs": "string",
"endTimeMs": "string",
"durationSeconds": "string"
},
"totalSpeechMs": "string",
"speechRatio": "string",
"totalSilenceMs": "string",
"silenceRatio": "string",
"wordsCount": "string",
"lettersCount": "string",
"wordsPerSecond": {
"min": "string",
"max": "string",
"mean": "string",
"std": "string",
"quantiles": [
{
"level": "string",
"value": "string"
}
]
},
"lettersPerSecond": {
"min": "string",
"max": "string",
"mean": "string",
"std": "string",
"quantiles": [
{
"level": "string",
"value": "string"
}
]
}
},
"wordsPerUtterance": {
"min": "string",
"max": "string",
"mean": "string",
"std": "string",
"quantiles": [
{
"level": "string",
"value": "string"
}
]
},
"lettersPerUtterance": {
"min": "string",
"max": "string",
"mean": "string",
"std": "string",
"quantiles": [
{
"level": "string",
"value": "string"
}
]
},
"utteranceCount": "string",
"utteranceDurationEstimation": {
"min": "string",
"max": "string",
"mean": "string",
"std": "string",
"quantiles": [
{
"level": "string",
"value": "string"
}
]
}
}
]
},
"points": {
"quiz": [
{
"request": "string",
"response": "string",
"id": "string"
}
]
},
"textClassifiers": {
"classificationResult": [
{
"classifier": "string",
"classifierStatistics": [
{
"channelNumber": "string",
"totalCount": "string",
"histograms": [
{
"countValues": [
"string"
]
}
]
}
]
}
]
},
"summarization": {
"statements": [
{
"field": {
"id": "string",
"name": "string",
"type": "string"
},
"response": [
"string"
]
}
]
},
"assistants": {
"assistantResults": [
{
"assistantId": "string",
"results": [
{
"fieldId": "string",
// Includes only one of the fields `stringResult`, `intResult`, `floatResult`
"stringResult": "string",
"intResult": "string",
"floatResult": "string"
// end of the list of possible fields
}
]
}
]
},
"talkState": {
"processingState": "string",
"algorithmProcessingInfos": [
{
"algorithm": "string",
"processingState": "string"
}
]
}
}
]
}
|
Field |
Description |
|
talk[] |
Talk
|
Field |
Description |
|
id |
string talk id |
|
organizationId |
string |
|
spaceId |
string |
|
connectionId |
string |
|
projectIds[] |
string |
|
createdBy |
string audition info |
|
createdAt |
string (date-time) String in RFC3339 To work with values in this field, use the APIs described in the |
|
modifiedBy |
string |
|
modifiedAt |
string (date-time) String in RFC3339 To work with values in this field, use the APIs described in the |
|
talkFields[] |
key-value representation of talk fields with values |
|
transcription |
various ml analysis results |
|
speechStatistics |
|
|
silenceStatistics |
|
|
interruptsStatistics |
|
|
conversationStatistics |
|
|
points |
|
|
textClassifiers |
|
|
summarization |
|
|
assistants |
|
|
talkState |
Field
connection field value
|
Field |
Description |
|
name |
string name of the field |
|
value |
string field value |
|
type |
enum (FieldType) field type
|
Transcription
|
Field |
Description |
|
phrases[] |
|
|
algorithmsMetadata[] |
Their might be several algorithms that work on talk transcription. For example: speechkit and translator |
Phrase
|
Field |
Description |
|
channelNumber |
string (int64) |
|
startTimeMs |
string (int64) |
|
endTimeMs |
string (int64) |
|
phrase |
|
|
statistics |
|
|
classifiers[] |
PhraseText
|
Field |
Description |
|
text |
string |
|
language |
string |
|
normalizedText |
string |
|
words[] |
Word
|
Field |
Description |
|
word |
string |
|
startTimeMs |
string (int64) |
|
endTimeMs |
string (int64) |
PhraseStatistics
|
Field |
Description |
|
statistics |
UtteranceStatistics
|
Field |
Description |
|
speakerTag |
string |
|
speechBoundaries |
Audio segment boundaries |
|
totalSpeechMs |
string (int64) Total speech duration |
|
speechRatio |
string Speech ratio within audio segment |
|
totalSilenceMs |
string (int64) Total silence duration |
|
silenceRatio |
string Silence ratio within audio segment |
|
wordsCount |
string (int64) Number of words in recognized speech |
|
lettersCount |
string (int64) Number of letters in recognized speech |
|
wordsPerSecond |
Descriptive statistics for words per second distribution |
|
lettersPerSecond |
Descriptive statistics for letters per second distribution |
AudioSegmentBoundaries
|
Field |
Description |
|
startTimeMs |
string (int64) Audio segment start time |
|
endTimeMs |
string (int64) Audio segment end time |
|
durationSeconds |
string (int64) Duration in seconds |
DescriptiveStatistics
|
Field |
Description |
|
min |
string Minimum observed value |
|
max |
string Maximum observed value |
|
mean |
string Estimated mean of distribution |
|
std |
string Estimated standard deviation of distribution |
|
quantiles[] |
List of evaluated quantiles |
Quantile
|
Field |
Description |
|
level |
string Quantile level in range (0, 1) |
|
value |
string Quantile value |
RecognitionClassifierResult
|
Field |
Description |
|
startTimeMs |
string (int64) Start time of the audio segment used for classification |
|
endTimeMs |
string (int64) End time of the audio segment used for classification |
|
classifier |
string Name of the triggered classifier |
|
highlights[] |
List of highlights, i.e. parts of phrase that determine the result of the classification |
|
labels[] |
Classifier predictions |
PhraseHighlight
|
Field |
Description |
|
text |
string Text transcription of the highlighted audio segment |
|
offset |
string (int64) offset in symbols from the beginning of whole phrase where highlight begins |
|
count |
string (int64) count of symbols in highlighted text |
RecognitionClassifierLabel
|
Field |
Description |
|
label |
string The label of the class predicted by the classifier |
|
confidence |
string The prediction confidence |
AlgorithmMetadata
|
Field |
Description |
|
createdTaskDate |
string (date-time) String in RFC3339 To work with values in this field, use the APIs described in the |
|
completedTaskDate |
string (date-time) String in RFC3339 To work with values in this field, use the APIs described in the |
|
error |
|
|
traceId |
string |
|
name |
string |
Error
|
Field |
Description |
|
code |
string |
|
message |
string |
SpeechStatistics
|
Field |
Description |
|
totalSimultaneousSpeechDurationSeconds |
string (int64) Total simultaneous speech duration in seconds |
|
totalSimultaneousSpeechDurationMs |
string (int64) Total simultaneous speech duration in ms |
|
totalSimultaneousSpeechRatio |
string Simultaneous speech ratio within audio segment |
|
simultaneousSpeechDurationEstimation |
Descriptive statistics for simultaneous speech duration distribution |
SilenceStatistics
|
Field |
Description |
|
totalSimultaneousSilenceDurationMs |
string (int64) |
|
totalSimultaneousSilenceRatio |
string Simultaneous silence ratio within audio segment |
|
simultaneousSilenceDurationEstimation |
Descriptive statistics for simultaneous silence duration distribution |
|
totalSimultaneousSilenceDurationSeconds |
string (int64) |
InterruptsStatistics
|
Field |
Description |
|
speakerInterrupts[] |
Interrupts description for every speaker |
InterruptsEvaluation
|
Field |
Description |
|
speakerTag |
string Speaker tag |
|
interruptsCount |
string (int64) Number of interrupts made by the speaker |
|
interruptsDurationMs |
string (int64) Total duration of all interrupts |
|
interrupts[] |
Boundaries for every interrupt |
|
interruptsDurationSeconds |
string (int64) Total duration of all interrupts in seconds |
ConversationStatistics
|
Field |
Description |
|
conversationBoundaries |
Audio segment boundaries |
|
speakerStatistics[] |
Average statistics for each speaker |
SpeakerStatistics
|
Field |
Description |
|
speakerTag |
string Speaker tag |
|
completeStatistics |
analysis of all phrases in format of single utterance |
|
wordsPerUtterance |
Descriptive statistics for words per utterance distribution |
|
lettersPerUtterance |
Descriptive statistics for letters per utterance distribution |
|
utteranceCount |
string (int64) Number of utterances |
|
utteranceDurationEstimation |
Descriptive statistics for utterance duration distribution |
Points
|
Field |
Description |
|
quiz[] |
Quiz
|
Field |
Description |
|
request |
string |
|
response |
string |
|
id |
string |
TextClassifiers
|
Field |
Description |
|
classificationResult[] |
ClassificationResult
|
Field |
Description |
|
classifier |
string Classifier name |
|
classifierStatistics[] |
Classifier statistics |
ClassifierStatistics
|
Field |
Description |
|
channelNumber |
string (int64) Channel number, null for whole talk |
|
totalCount |
string (int64) classifier total count |
|
histograms[] |
Represents various histograms build on top of classifiers |
Histogram
|
Field |
Description |
|
countValues[] |
string (int64) histogram count values. For example: |
Summarization
|
Field |
Description |
|
statements[] |
SummarizationStatement
|
Field |
Description |
|
field |
|
|
response[] |
string |
SummarizationField
|
Field |
Description |
|
id |
string |
|
name |
string |
|
type |
enum (SummarizationFieldType)
|
Assistants
|
Field |
Description |
|
assistantResults[] |
List of assistants results |
AssistantResult
|
Field |
Description |
|
assistantId |
string Assistant id |
|
results[] |
Per-field assistant results |
AssistantFieldResult
|
Field |
Description |
|
fieldId |
string Assistant result field id |
|
stringResult |
string Result as a string Includes only one of the fields Parsed model answer for the field. |
|
intResult |
string (int64) Result as an integer Includes only one of the fields Parsed model answer for the field. |
|
floatResult |
string Result as a floating-point number Includes only one of the fields Parsed model answer for the field. |
TalkState
|
Field |
Description |
|
processingState |
enum (ProcessingState)
|
|
algorithmProcessingInfos[] |
AlgorithmProcessingInfo
|
Field |
Description |
|
algorithm |
enum (Algorithm)
|
|
processingState |
enum (ProcessingState)
|