Sample training client

ASRaaS offers a Python client application that you may download and run on Linux or Windows to compile wordsets using the Training API. To run this client, you need:

Python 3.6 or later.
The generated Python stubs from gRPC setup.
Your client ID and secret from Prerequisites from Mix.
A source (JSON) wordset as text or in a file. See Wordsets.
The Python requests module. Install it with pip install requests
The Python client files: sample-training-client.zip

Download the zip file and extract its files into the same directory as the nuance directory containing your proto files and client stubs.

On Linux, give run-training-client.sh execute permission with chmod +x. For example:

Linux
Windows

unzip sample-training-client.zip
chmod +x run-training-client.sh
pip install requests

pip install requests

Python client, training-client.py

#!/usr/bin/env python3
#Copyright 2019-2023 Nuance Communications, Inc. All rights reserved. 

import argparse
import sys
import time
import logging
import grpc
import os
import requests as req
import base64
import json
from importlib.machinery import SourceFileLoader
import threading
from google.protobuf import text_format
from google.protobuf import json_format
from util import configure_logging

from nuance.asr.v1beta1 import training_pb2
from nuance.asr.v1beta1 import training_pb2_grpc

thread_context = threading.local()
thread_context.oauth_token = None
thread_context.num_train_request = 0
thread_context.oauth_token_expiry_seconds = 0
total_first_chunk_latency = 0
total_train_request = 0

args = None

def get_oauth2_token():
    if args.oauthURL is None:
        return

    current_time = time.monotonic()

    if thread_context.oauth_token and thread_context.oauth_token_expiry_seconds - 3 > current_time:
        log.debug('OAuth token is still valid')
        return
    if args.disableBasicAuth:
        request = {
            'grant_type': 'client_credentials',
            'scope': args.oauthScope,
            'client_id': args.clientID,
            'client_secret': args.clientSecret
        }
        log.info("Obtaining auth token (Client ID: {}, URL: {})".format(
            args.clientID, args.oauthURL))     
        try:    
            response = req.post(args.oauthURL, data=request)
            if response.status_code != 200:
                raise Exception("Status: {}".format(
                    response.status_code))
        except Exception as er:
            log.error("Failed to obtain authentication token, error {}".format(er))
            raise SystemExit               
    else:
        request = {
            'grant_type': 'client_credentials',
            'scope': args.oauthScope
        }
        log.info("Obtaining auth token using basicAuth(Client ID: {}, URL: {})".format(
            args.clientID, args.oauthURL))
        try:
            response = req.post(args.oauthURL, auth=(args.clientID, args.clientSecret), data=request)
            if response.status_code != 200:
                raise Exception("Status: {}".format(
                    response.status_code))
        except Exception as er:
            log.error("Failed to obtain authentication token from basicauth, error {}".format(er))
            raise SystemExit   

    json_response = response.json()
    thread_context.oauth_token = json_response["access_token"]
    thread_context.oauth_token_expiry_seconds = time.monotonic() + \
        json_response["expires_in"]
    log.debug("Token TTL: %d" % json_response["expires_in"])


def send_deletewordset_request(grpc_client, request, metadata=None):
    log.info("Sending DeleteWordset request")

    metadata = []

    if args.meta:
        with open(args.meta if type(args.meta)is str else '.metadata', 'r') as meta_file:
            for n, line in enumerate(meta_file):
                header, value = line.split(':', 1)
                metadata.append((header.strip(), value.strip()))
                
    if args.nmaid:
        metadata.append(('x-nuance-client-id', args.nmaid))	

    log.info("Sending request: {}".format(request))
    log.info("Sending metadata: {}".format(metadata))
    response = grpc_client.DeleteWordset(request=request, metadata=metadata)
    log.info("Received response: {}".format(response))		  

def send_getwordsetmetadata_request(grpc_client, request, metadata=None):
    log.info("Sending GetWordsetMetadata request")

    metadata = []

    if args.meta:
        with open(args.meta if type(args.meta)is str else '.metadata', 'r') as meta_file:
            for n, line in enumerate(meta_file):
                header, value = line.split(':', 1)
                metadata.append((header.strip(), value.strip()))
                
    if args.nmaid:
        metadata.append(('x-nuance-client-id', args.nmaid))

    log.info("Sending request: {}".format(request))
    log.info("Sending metadata: {}".format(metadata))
    response = grpc_client.GetWordsetMetadata(request=request, metadata=metadata)
    log.info("Received response: {}".format(response))


def send_compilewordsetandwatch_request(grpc_client, request, metadata=None):
    log.info("Sending CompileWordsetAndWatch request")

    metadata = []
    client_span = None
    train_span = None
    count_stream = 0

    global total_train_request
    total_train_request = total_train_request + 1
    
    global args
    
    thread_context.num_train_request = thread_context.num_train_request + 1
    
    if args.wsFile:
        log.info("Override the inline wordset with input file [%s]" % args.wsFile)
        request.wordset = open(args.wsFile, 'rb').read()
    
    if args.meta:
        with open(args.meta if type(args.meta)is str else '.metadata', 'r') as meta_file:
            for n, line in enumerate(meta_file):
                header, value = line.split(':', 1)
                metadata.append((header.strip(), value.strip()))
                
    if args.nmaid:
        metadata.append(('x-nuance-client-id', args.nmaid))
    
    if args.jaeger:
        log.debug("Injecting Jaeger span context into request")
        client_span = tracer.start_span("Client.gRPC")
        train_span = tracer.start_span(
            "Client.Training", child_of=client_span)
        carrier = dict()
        tracer.inject(train_span.context,
                      opentracing.propagation.Format.TEXT_MAP, carrier)
        metadata.append(('uber-trace-id', carrier['uber-trace-id']))
    
    start = time.monotonic()
    log.info("Sending request: {}".format(request))
    log.info("Sending metadata: {}".format(metadata))
    responses = grpc_client.CompileWordsetAndWatch(request=request, metadata=metadata)
    for response in responses:
      count_stream = count_stream + 1
      log.info("new server stream count {}".format(count_stream))
      log.info("Received response: {}".format(response))
    latency = time.monotonic() - start
    global total_first_chunk_latency
    total_first_chunk_latency = total_first_chunk_latency + latency

    log.info("First chunk latency: {} seconds".format(latency))

    if train_span:
        train_span.finish()
    if client_span:
        client_span.finish()

def parse_args():
    global args
    parser = argparse.ArgumentParser(
        prog="training-client.py",
        usage="%(prog)s [-options]",
        add_help=False,
        formatter_class=lambda prog: argparse.HelpFormatter(
            prog, max_help_position=45, width=100)
    )
    options = parser.add_argument_group("options")
    options.add_argument("-h", "--help", action="help",
                         help="Show this help message and exit")
    options.add_argument("--nmaid", nargs="?", help=argparse.SUPPRESS)
    options.add_argument("--token", nargs="?", help=argparse.SUPPRESS)
    options.add_argument("-f", "--files", metavar="file", nargs="+",
                         help="List of flow files to execute sequentially, default=flow.py", default=['flow.py'])
    options.add_argument('-l', '--loglevel', metavar='lvl', choices=['fatal','error','warn','info','debug'], default='info', help='Log level: fatal, error, warn, default=info, debug')	
    options.add_argument('-L', '--logfile', metavar='fn', nargs='?', const=True, help='Log to file, default fn=krcli-{datetimestamp}.log')
    options.add_argument('-q', '--quiet', action='store_true', help='Disable console logging')
    options.add_argument("-p", "--parallel", action="store_true",
                         help="Run each flow in a separate thread")
    options.add_argument("-i", "--iterations", metavar="num", nargs="?",
                         help="Number of times to run the list of files, default=1", default=1, type=int)
    options.add_argument("-s", "--serverUrl", metavar="url", nargs="?",
                         help="ASR service URL, default=localhost:8090", default='localhost:8090')
    options.add_argument("-b", "--disableBasicAuth",
                         help="Basic auth is required for Mix-generated credentials, disable for others", action='store_true')
    options.add_argument("--oauthURL", metavar="url", nargs="?",
                         help="OAuth 2.0 URL")
    options.add_argument("--clientID", metavar="url", nargs="?",
                         help="OAuth 2.0 Client ID")
    options.add_argument("--clientSecret", metavar="url", nargs="?",
                         help="OAuth 2.0 Client Secret")
    options.add_argument("--oauthScope", metavar="url", nargs="?",
                         help="OAuth 2.0 Scope, default=asr.wordset", default='asr.wordset')
    options.add_argument("--secure", action="store_true",
                         help="Connect to the server using a secure gRPC channel")
    options.add_argument("--rootCerts",  metavar="file", nargs="?",
                         help="Root certificates when using a secure gRPC channel")
    options.add_argument("--privateKey",  metavar="file", nargs="?",
                         help="Certificate private key when using a secure gRPC channel")
    options.add_argument("--certChain",  metavar="file", nargs="?",
                         help="Certificate chain when using a secure gRPC channel")
    options.add_argument("--jaeger", metavar="addr", nargs="?", const='udp://localhost:6831',
                         help="Send UDP opentrace spans, default addr=udp://localhost:6831")
    options.add_argument('--meta', metavar='txtfile', nargs='?', const=True, help='Read header:value metadata lines from file, default=.metadata', default=None)
    options.add_argument("--maxReceiveSizeMB", metavar="megabytes", nargs="?",
                         help="Maximum length of gRPC server response in megabytes, default=50 MB", default=50, type=int)
    options.add_argument("--wsFile", metavar="file", nargs="?",
                         help="Inline wordset file for a gRPC channel. If provided, overrides request.wordset in input file")
    args = parser.parse_args()

def initialize_tracing():
    if args.jaeger:
        print("Enabling Jaeger traces")
        global opentracing
        import opentracing
        import jaeger_client

        from urllib.parse import urlparse
        agent_addr = urlparse(args.jaeger)
        if not agent_addr.netloc:
            raise Exception(
                "invalid jaeger agent address: {}".format(args.jaeger))
        if not agent_addr.hostname:
            raise Exception(
                "missing hostname in jaeger agent address: {}".format(args.jaeger))
        if not agent_addr.port:
            raise Exception(
                "missing port in jaeger agent address: {}".format(args.jaeger))
        tracer_config = {
            'sampler': {
                'type': 'const',
                'param': 1,
            },
            'local_agent': {
                'reporting_host': agent_addr.hostname,
                'reporting_port': agent_addr.port
            },
            'logging': True
        }
        config = jaeger_client.Config(
            config=tracer_config, service_name='nqgcli-client', validate=True)
        global tracer
        tracer = config.initialize_tracer()

def create_channel():
    call_credentials = None
    channel = None
    
    if args.token:
        log.debug('Adding CallCredentials using token parameter')
        call_credentials = grpc.access_token_call_credentials(args.token)
    else:
        get_oauth2_token()
        if thread_context.oauth_token:
            log.debug('Adding CallCredentials from OAuth endpoint')
            call_credentials = grpc.access_token_call_credentials(
                thread_context.oauth_token)
    
    if args.secure:
        log.debug("Creating secure gRPC channel")
        root_certificates = None
        certificate_chain = None
        private_key = None
        if args.rootCerts:
            log.debug("Adding root certs")
            root_certificates = open(args.rootCerts, 'rb').read()
        if args.certChain:
            log.debug("Adding cert chain")
            certificate_chain = open(args.certChain, 'rb').read()
        if args.privateKey:
            log.debug("Adding private key")
            private_key = open(args.privateKey, 'rb').read()

        channel_credentials = grpc.ssl_channel_credentials(
            root_certificates=root_certificates, private_key=private_key, certificate_chain=certificate_chain)
        if call_credentials is not None:
            channel_credentials = grpc.composite_channel_credentials(
                channel_credentials, call_credentials)
        channel = grpc.secure_channel(args.serverUrl, credentials=channel_credentials, options=[
                                      ('grpc.max_receive_message_length', args.maxReceiveSizeMB * 1024 * 1024)])
    else:
        log.debug("Creating insecure gRPC channel")
        channel = grpc.insecure_channel(args.serverUrl, options=[(
            'grpc.max_receive_message_length', args.maxReceiveSizeMB * 1024 * 1024)])

    return channel
        
def run_one_file(file, list_of_requests, module):
    thread_context.oauth_token = None
    thread_context.num_train_request = 0
    thread_context.oauth_token_expiry_seconds = 0
    
    with create_channel() as channel:
        grpc_client = training_pb2_grpc.TrainingStub(channel=channel)
        log.info("Running file [%s]" % file)
        #log.debug(list_of_requests)

        thread_context.num_train_request = 0
        thread_context.file = os.path.basename(file)

        for request in list_of_requests:
            if isinstance(request, training_pb2.CompileWordsetRequest):
                send_compilewordsetandwatch_request(grpc_client, request)
            elif isinstance(request, training_pb2.DeleteWordsetRequest):
                send_deletewordset_request(grpc_client, request)
            elif isinstance(request, training_pb2.GetWordsetMetadataRequest):
                send_getwordsetmetadata_request(grpc_client, request)
            elif isinstance(request, (int, float)):
                log.info("Waiting for {} seconds".format(request))
                time.sleep(request)
        log.info("Done running file [%s]" % file)

def run():
    parse_args()

    global log
    log = logging.getLogger('nqgcli')
    configure_logging(log, args)

    if args.oauthURL:
        if args.clientID is None:
            log.error("OAuth 2.0 URL was supplied but client ID is missing")
            return
        elif args.clientSecret is None:
            log.error("OAuth 2.0 URL was supplied but client secret is missing")
            return

    initialize_tracing()

    for i in range(args.iterations):
        global num_iterations
        num_iterations = i + 1
        log.info("Iteration #{}".format(num_iterations))
        threads = []
        for file in args.files:
            absolute_path = os.path.abspath(file)
            module_name = os.path.splitext(absolute_path)[0]
            module = SourceFileLoader(module_name, absolute_path).load_module()

            # module = importlib.import_module(basename)
            if module.list_of_requests == None:
                raise Exception(
                    "Error importing [%s]: variable list_of_requests not defined" % file)
            if args.parallel:
                log.info("Running flows in parallel")
                thread = threading.Thread(target=run_one_file, args=[
                                          file, module.list_of_requests])
                threads.append(thread)
                thread.start()
            else:
                log.info("Running flows in serial")
                run_one_file(file, module.list_of_requests, module)
        for thread in threads:
            thread.join()
        log.info("Iteration #{} complete".format(num_iterations))

    if total_train_request > 0:
        log.info("Average first-chunk latency (over {} train requests): {} seconds".format(
            total_train_request, total_first_chunk_latency/(total_train_request)))

    if args.jaeger:
        tracer.close()
        # Need to give time to tracer to flush the spans: https://github.com/jaegertracing/jaeger-client-python/issues/50
        time.sleep(2)
    print("Done")


if __name__ == '__main__':
    run()

Wordset source file, places-wordset.json

These are the resulting client files, in the same directory as the nuance directory:

├── training-client.py
├── flow_compilewordsetandwatch.py
├── flow_deletewordset.py
├── flow_getwordsetmetadata.py
├── places-wordset.json
├── run-training-client.bat
├── run-training-client.sh
├── util.py
└── nuance
    ├── asr
    │   ├── v1  [ASR Recognizer files]
    │   └── v1beta1 
    │       ├── training_pb2_grpc.py
    |       └── training_pb2.py
    └── rpc
        ├── error_details_pb2.py
        ├── status_code_pb2.py
        └── status_pb2.py

You can use the client to compile wordsets, get information about existing compiled wordsets, and delete compiled wordsets. Once you have created the compiled wordsets, you can use them in the Recognizer API. See ResourceReference.

Get help

For a quick check that the client is working, and to see the arguments it accepts, run the client directly using the help (-h or --help) option.

See the results below and notice:

-f or --files: The input file or files containing your wordset training input and parameters. The sample script expects you to provide one of the flow_*.py files provided in the download package. For multiple files, specify --files flow1.py flow2.py.
-s or --serverUrl: The URL of the training server. The sample script specifies the Mix service, asr.api.nuance.com, on its default port, 443.
Authorization arguments: The recommended arguments are --clientID, --clientSecret, and --oauthURL. See Prerequisites from Mix.
--token: This hidden argument specifies an access token. If you use it, omit the other authorization arguments. See Another authorization method.
--oauthScope: The default is asr.wordset, as required by the Training API.
The --rootCerts, --privateKey, --certChain, and --jaeger arguments are not used in this hosted Mix environment.

The results are the same on Linux and Windows.

python3 training-client.py --help

usage: training-client.py [-options]

options:
  -h, --help                     Show this help message and exit
  -f file [file ...], --files file [file ...]
                                 List of flow files to execute sequentially,
                                 default=flow.py
  -l lvl, --loglevel lvl         Log level: fatal, error, warn, default=info, debug
  -L [fn], --logfile [fn]        Log to file, default fn=krcli-{datetimestamp}.log
  -q, --quiet                    Disable console logging
  -p, --parallel                 Run each flow in a separate thread
  -i [num], --iterations [num]   Number of times to run the list of files, default=1
  -s [url], --serverUrl [url]    ASR service URL, default=localhost:8090
  -b, --disableBasicAuth         Basic auth is required for Mix-generated credentials,
                                 disable for others
  --oauthURL [url]               OAuth 2.0 URL
  --clientID [url]               OAuth 2.0 Client ID
  --clientSecret [url]           OAuth 2.0 Client Secret
  --oauthScope [url]             OAuth 2.0 Scope, default=asr.wordset
  --secure                       Connect to the server using a secure gRPC channel
  --rootCerts [file]             Root certificates when using a secure gRPC channel
  --privateKey [file]            Certificate private key when using a secure gRPC
                                 channel
  --certChain [file]             Certificate chain when using a secure gRPC channel
  --jaeger [addr]                Send UDP opentrace spans, default
                                 addr=udp://localhost:6831
  --meta [txtfile]               Read header:value metadata lines from file,
                                 default=.metadata
  --maxReceiveSizeMB [megabytes] Maximum length of gRPC server response in megabytes,
                                 default=50 MB
  --wsFile [file]                Inline wordset file for a gRPC channel. If provided,
                                 overrides request.wordset in input file

Edit run script

First, edit the sample shell script or batch file to add your Mix client ID and secret. The script replaces the colons in the client ID with %3A so the value can be parsed correctly in subsequent operations.

Linux: run-training-client.sh
Windows: run-training-client.bat

#!/bin/bash

CLIENT_ID=<Mix client ID, starting with appID:>
SECRET=<Mix client secret>
# Change colons (:) to %3A in client ID
CLIENT_ID=${CLIENT_ID//:/%3A}

python3 client.py --secure \
--clientID $CLIENT_ID --clientSecret $SECRET \
--serverUrl asr.api.nuance.com:443 \
--oauthURL https://auth.crt.nuance.com/oauth2/token \
--files $1 

# $1 - The name and location of an input file, for example flow_compilewordsetandwatch.

@echo off
setlocal enabledelayedexpansion

set CLIENT_ID=< Mix client ID, starting with appID:>
set SECRET=<Mix client secret>
rem Change colons (:) to %3A in client ID
set CLIENT_ID=!CLIENT_ID::=%%3A!

python client.py --secure ^
--clientID %CLIENT_ID% --clientSecret %SECRET% ^
--serverUrl asr.api.nuance.com:443 ^
--oauthURL https://auth.crt.nuance.com/oauth2/token ^
--files %1 

rem %1 - The name and location of an input file, for example flow_compilewordsetandwatch.

Compile wordset

To compile a wordset, send the training request and watch as the job progresses. This scenario uses the flow_compilewordsetandwatch.py input file, which calls the CompileWordsetAndWatch method. The results are streamed back from the server as the compilation proceeds, so you can see the progress of the job.

Open the input file, flow_compilewordsetandwatch.py, and adjust the values for your wordset:

companion_artifact_reference.uri: Change /<context_tag>/ to identify the DLM containing the entity you are extending with a wordset. This is the Context Tag of a Mix application, for example, /names-places/ or /A77_C1946/.
target_artifact_reference.uri: Change /<context_tag>/<wordset_name>/ to a context tag and name for the compiled wordset you are creating. You may create a new tag for the wordset or use the same tag as its DLM, for example, /names-places/places-compiled-ws/.
wordset: Enter your source wordset in compressed JSON. You may optionally leave this wordset as is and provide your own source wordset in a file containing either expanded or compressed JSON. The sample package includes a wordset file that you may edit: see places-wordset.json.
metadata: Optionally change the value to your operating system.

In this example, the wordset being created is named places-compiled-ws. This wordset extends the PLACES entity in the DLM referenced in companion_artifact_reference:

from nuance.asr.v1beta1.training_pb2 import *

list_of_requests = [8-9]

request = CompileWordsetRequest()

request.companion_artifact_reference.uri = "urn:nuance-mix:tag:model/names-places/mix.asr?=language=eng-USA"
request.target_artifact_reference.uri = "urn:nuance-mix:tag:wordset:lang/names-places/places-compiled-ws/eng-USA/mix.asr"
request.wordset = '{"PLACES":[{"literal":"La Jolla","spoken":["la hoya","la jolla"]},{"literal":"Llanfairpwllgwyngyll","spoken":["lan vire pool guin gill"]},{"literal":"Abington Pigotts"},{"literal":"Steeple Morden"},{"literal":"Hoyland Common"},{"literal":"Cogenhoe","spoken":["cook no"]},{"literal":"Fordoun","spoken":["forden","fordoun"]},{"literal":"Llangollen","spoken":["lan goth lin","lan gollen"]},{"literal":"Auchenblae"}]}'
request.metadata['app_os'] = 'CentOS'

#Add request to list
list_of_requests.append(request)

By default, the shell script or batch file uses the inline wordset in the flow file. If you prefer to use a source wordset file, add the --wsFile argument, for example:

Linux: run-training-client.sh
Windows: run-training-client.bat

. . . 
python3 client.py --secure \
--clientID $CLIENT_ID --clientSecret $SECRET \
--serverUrl asr.api.nuance.com:443 \
--oauthURL https://auth.crt.nuance.com/oauth2/token \
--files $1 \
--wsFile places-wordset.json

. . . 
python client.py --secure ^
--clientID %CLIENT_ID% --clientSecret %SECRET% ^
--serverUrl asr.api.nuance.com:443 ^
--oauthURL https://auth.crt.nuance.com/oauth2/token ^
--files %1 ^
--wsFile places-wordset.json

Run the client using the shell script or batch file, passing it the input file, flow_compilewordsetandwatch.py, and watch the streaming results. ASRaaS reads the wordset from the file, compiles it as places-compiled-ws, and stores it in the Mix environment.

This example uses the --wsFile argument to pass the wordset file to the client, overriding the inline wordset in the input file. The results are the same on Linux and Windows.

Linux
Windows

./run-training-client.sh flow_compilewordsetandwatch.py

2023-01-26 16:56:20,924 INFO : Iteration #1
2023-01-26 16:56:20,924 INFO : Running flows in serial
2023-01-26 16:56:20,924 INFO : Obtaining auth token using basicAuth(...)
2023-01-26 16:56:21,308 INFO : Running file [flow_compilewordsetandwatch.py]
2023-01-26 16:56:21,308 INFO : Sending CompileWordsetAndWatch request
2023-01-26 16:56:21,309 INFO : Override the inline wordset with input file [places-wordset.json]
2023-01-26 16:56:21,309 INFO : Sending request: wordset: "{\n  \"PLACES\":[\n    {\n      \"literal\":\"La Jolla\",\n      \"spoken\":[\n        \"la hoya\",\n        \"la jolla\"\n      ]\n    },\n    {\n      \"literal\":\"Llanfairpwllgwyngyll\",\n      \"spoken\":[\n        \"lan vire pool guin gill\"\n      ]\n    },\n    {\n      \"literal\":\"Abington Pigotts\"\n    },\n    {\n      \"literal\":\"Steeple Morden\"\n    },\n    {\n      \"literal\":\"Hoyland Common\"\n    },\n    {\n      \"literal\":\"Cogenhoe\",\n      \"spoken\":[\n        \"cook no\"\n      ]\n    },\n    {\n      \"literal\":\"Fordoun\",\n      \"spoken\":[\n        \"forden\",\n        \"fordoun\"\n      ]\n    },\n    {\n      \"literal\":\"Llangollen\",\n      \"spoken\":[\n        \"lan goth lin\",\n        \"lan gollen\"\n      ]\n    },\n    {\n      \"literal\":\"Auchenblae\"\n    }\n  ]\n}\n"
companion_artifact_reference {
  uri: "urn:nuance-mix:tag:model/names-places/mix.asr?=language=eng-USA"
}
target_artifact_reference {
  uri: "urn:nuance-mix:tag:wordset:lang/names-places/places-compiled-ws/eng-USA/mix.asr"
}
metadata {
  key: "app_os"
  value: "CentOS"
}

2023-01-26 16:56:21,309 INFO : Sending metadata: []
2023-01-26 16:56:21,780 INFO : new server stream count 1
2023-01-26 16:56:21,780 INFO : Received response: job_status_update {
  job_id: "23178180-9dc3-11ed-8438-e116a8a4ec79"
  status: JOB_STATUS_QUEUED
}
request_status {
  status_code: OK
  http_trans_code: 200
}

2023-01-26 16:56:22,077 INFO : new server stream count 2
2023-01-26 16:56:22,077 INFO : Received response: job_status_update {
  job_id: "23178180-9dc3-11ed-8438-e116a8a4ec79"
  status: JOB_STATUS_COMPLETE
}
request_status {
  status_code: OK
  http_trans_code: 200
}

2023-01-26 16:56:22,078 INFO : First chunk latency: 0.7691743187606335 seconds
2023-01-26 16:56:22,078 INFO : Done running file [flow_compilewordsetandwatch.py]
2023-01-26 16:56:22,078 INFO : Iteration #1 complete
2023-01-26 16:56:22,078 INFO : Average first-chunk latency (over 1 train requests): 0.7691743187606335 seconds
Done

run-training-client.bat flow_compilewordsetandwatch.py

2023-06-20 11:44:01,523 INFO : Iteration #1
2023-06-20 11:44:01,529 INFO : Running flows in serial
2023-06-20 11:44:01,529 INFO : Obtaining auth token using basicAuth(...)
2023-06-20 11:44:01,966 INFO : Running file [flow_compilewordsetandwatch.py]
2023-06-20 11:44:01,966 INFO : Sending CompileWordsetAndWatch request
2023-06-20 11:44:01,966 INFO : Override the inline wordset with input file [places-wordset.json]
2023-06-20 11:44:01,973 INFO : Sending request: wordset: "{\n  \"PLACES\":[\n    {\n      \"literal\":\"La Jolla\",\n      \"spoken\":[\n        \"la hoya\",\n        \"la jolla\"\n      ]\n    },\n    {\n      \"literal\":\"Llanfairpwllgwyngyll\",\n      \"spoken\":[\n        \"lan vire pool guin gill\"\n      ]\n    },\n    {\n      \"literal\":\"Abington Pigotts\"\n    },\n    {\n      \"literal\":\"Steeple Morden\"\n    },\n    {\n      \"literal\":\"Hoyland Common\"\n    },\n    {\n      \"literal\":\"Cogenhoe\",\n      \"spoken\":[\n        \"cook no\"\n      ]\n    },\n    {\n      \"literal\":\"Fordoun\",\n      \"spoken\":[\n        \"forden\",\n        \"fordoun\"\n      ]\n    },\n    {\n      \"literal\":\"Llangollen\",\n      \"spoken\":[\n        \"lan goth lin\",\n        \"lan gollen\"\n      ]\n    },\n    {\n      \"literal\":\"Auchenblae\"\n    }\n  ]\n}\n"
companion_artifact_reference {
  uri: "urn:nuance-mix:tag:model/names-places/mix.asr?=language=eng-USA"
}
target_artifact_reference {
  uri: "urn:nuance-mix:tag:wordset:lang/names-places/places-compiled-ws/eng-USA/mix.asr"
}
metadata {
  key: "app_os"
  value: "Windows"
}

2023-06-20 11:44:01,973 INFO : Sending metadata: []
2023-06-20 11:44:02,350 INFO : new server stream count 1
2023-06-20 11:44:02,351 INFO : Received response: job_status_update {
  job_id: "47a8b2a0-0f81-11ee-8afb-293ff5c2eff8"
  status: JOB_STATUS_QUEUED
}
request_status {
  status_code: OK
  http_trans_code: 200
}

2023-06-20 11:44:02,668 INFO : new server stream count 2
2023-06-20 11:44:02,684 INFO : Received response: job_status_update {
  job_id: "47a8b2a0-0f81-11ee-8afb-293ff5c2eff8"
  status: JOB_STATUS_COMPLETE
}
request_status {
  status_code: OK
  http_trans_code: 200
}

2023-06-20 11:44:02,684 INFO : First chunk latency: 0.7030000000377186 seconds
2023-06-20 11:44:02,684 INFO : Done running file [flow_compilewordsetandwatch.py]
2023-06-20 11:44:02,684 INFO : Iteration #1 complete
2023-06-20 11:44:02,684 INFO : Average first-chunk latency (over 1 train requests): 0.7030000000377186 seconds
Done

You can then reference the compiled wordset in your recognition requests (see ResourceReference) using the URN you provided, for example:

urn:nuance-mix:tag:wordset:lang/<context_tag>/places-compiled-ws/eng-USA/mix.asr

Get information

To obtain information about a compiled wordset, use the flow_getwordsetmetadata.py input file, which calls the GetWordsetMetadata method. It returns metadata information but not the source JSON wordset.

Open flow_getwordsetmetadata.py and adjust the values for your wordset:

artifact_reference.uri: Change /<context_tag>/<wordset_name>/ to the context tag and name of an existing wordset, for example, /names-places/places-compiled-ws/.

from nuance.asr.v1beta1.training_pb2 import *

list_of_requests = []

request = GetWordsetMetadataRequest()
request.artifact_reference.uri = "urn:nuance-mix:tag:wordset:lang/names-places/places-compiled-ws/eng-USA/mix.asr"

# Add request to list
list_of_requests.append(request)

Run the client using the script file, passing it flow_getwordsetmetadata.py as input. The results are the same on Linux and Windows.

./run-training-client.sh flow_getwordsetmetadata.py

2023-01-26 17:04:54,160 INFO : Iteration #1
2023-01-26 17:04:54,161 INFO : Running flows in serial
2023-01-26 17:04:54,161 INFO : Obtaining auth token using basicAuth(...)
2023-01-26 17:04:54,492 INFO : Running file [flow_getwordsetmetadata.py]
2023-01-26 17:04:54,492 INFO : Sending GetWordsetMetadata request
2023-01-26 17:04:54,492 INFO : Sending request: artifact_reference {
  uri: "urn:nuance-mix:tag:wordset:lang/names-places/places-compiled-ws/eng-USA/mix.asr"
}

2023-01-26 17:04:54,492 INFO : Sending metadata: []
2023-01-26 17:04:54,819 INFO : Received response: metadata {
  key: "app_os"
  value: "CentOS"
}
metadata {
  key: "content-type"
  value: "application/x-nuance-wordset-pkg"
}
metadata {
  key: "x_nuance_companion_checksum_sha256"
  value: "fbb50be65b2000d4eb18da64dfd238118024309136d65e910b89f592095cd497"
}
metadata {
  key: "x_nuance_compiled_wordset_checksum_sha256"
  value: "3c75b884164618d564337dffb35429db8c2579c0ad555f139b80e7fab2193662"
}
metadata {
  key: "x_nuance_compiled_wordset_last_update"
  value: "2023-01-26T21:48:15.373Z"
}
metadata {
  key: "x_nuance_wordset_content_checksum_sha256"
  value: "2b6e2eff5bcbfbea26284c7b5576be39ac8157041bdb133e7ed02ccf1a346b1a"
}
request_status {
  status_code: OK
  http_trans_code: 200
}

2023-01-26 17:04:54,819 INFO : Done running file [flow_getwordsetmetadata.py]
2023-01-26 17:04:54,822 INFO : Iteration #1 complete
Done

Delete wordset

To delete a compiled wordset, use the flow_deletewordset.py input file, which calls the DeleteWordset method. It removes the wordset permanently from the Mix environment.

Open flow_deletewordset.py nd adjust the values for your wordset:

artifact_reference.uri: Change /<context_tag>/<wordset_name>/ to the context tag and name of the wordset you wish to delete, for example, /names-places/places-compiled-ws/.

from nuance.asr.v1beta1.training_pb2 import *

list_of_requests = []

request = DeleteWordsetRequest()
request.artifact_reference.uri = "urn:nuance-mix:tag:wordset:lang/names-places/places-compiled-ws/eng-USA/mix.asr"

#Add request to list
list_of_requests.append(request)

Run the client using the run script, passing it flow_deletewordset.py as input. The results are the same on Linux and Windows.

./run-training-client.sh flow_deletewordset.py

2023-01-26 16:50:29,584 INFO : Iteration #1
2023-01-26 16:50:29,584 INFO : Running flows in serial
2023-01-26 16:50:29,584 INFO : Obtaining auth token using basicAuth(...)
2023-01-26 16:50:29,947 INFO : Running file [flow_deletewordset.py]
2023-01-26 16:50:29,947 INFO : Sending DeleteWordset request
2023-01-26 16:50:29,948 INFO : Sending request: artifact_reference {
  uri: "urn:nuance-mix:tag:wordset:lang/names-places/places-compiled-ws/eng-USA/mix.asr"
}

2023-01-26 16:50:29,948 INFO : Sending metadata: []
2023-01-26 16:50:30,291 INFO : Received response: request_status {
  status_code: OK
  http_trans_code: 200
}

2023-01-26 16:50:30,291 INFO : Done running file [flow_deletewordset.py]
2023-01-26 16:50:30,292 INFO : Iteration #1 complete
Done

Troubleshooting

These are some of the errors you may encounter using the sample training client.

Existing wordset: If you use the same wordset name in a compile request, you receive an error that the wordset already exists. You can either use a new name or delete the existing wordset before creating it again:

2021-04-05 17:37:41,457 INFO : Sending metadata: []
2021-04-05 17:37:41,977 INFO : Received response: request_status {
  status_code: ALREADY_EXISTS
  status_sub_code: 10
  http_trans_code: 200
  status_message {
    locale: "en-US"
    message: "Compiled wordset already available for artifact reference urn:nuance-mix:tag:wordset:lang/names-places/places-compiled-ws/eng-USA/mix.asr"
    message_resource_id: "10"
  }
}

JSON errors: If you source wordset uses incorrect JSON, you receive errors to help you correct it. In this example, the JSON is missing a quotation mark:

2021-04-05 16:34:55,874 INFO : Received response: request_status {
  status_code: BAD_REQUEST
  status_sub_code: 7
  http_trans_code: 400
  status_message {
    locale: "en-US"
    message: "Invalid wordset content Unexpected token c in JSON at position 5"
    message_resource_id: "7"
  }
}

And this JSON has a missing end brace:

2021-04-05 16:39:16,027 INFO : Received response: request_status {
  status_code: BAD_REQUEST
  status_sub_code: 7
  http_trans_code: 400
  status_message {
    locale: "en-US"
    message: "Invalid wordset content Unexpected end of JSON input"
    message_resource_id: "7"
  }
}

Not defined in model: You may ignore this error.

  status: JOB_STATUS_QUEUED
    messages {
    code: 15
    message: "The following entities are not defined in the model:  PLACES."
  }

Another authorization method

The shell script or batch file provided with the sample client passes your Mix credentials to the client. The client generates a token that authorizes it to use the Training service. This is the recommended method as it only generates a new token when the existing one is about to expire.

For testing purposes, you may instead generate a token and pass it to the client. Copy the following code into a shell script or batch file. On Linux, give the shell script execute permission with chmod +x.

Linux: run-training-token.sh
Windows: run-training-token.bat

#!/bin/bash

CLIENT_ID=<Mix client ID, starting with appID:>
SECRET=<Mix client secret>
#Change colons (:) to %3A in client ID
CLIENT_ID=${CLIENT_ID//:/%3A}

MY_TOKEN="`curl -s -u "$CLIENT_ID:$SECRET" \
"https://auth.crt.nuance.com/oauth2/token" \
-d "grant_type=client_credentials" -d "scope=asr.wordset" \
| python -c 'import sys, json; print(json.load(sys.stdin)["access_token"])'`"

python3 training-client.py --serverUrl asr.api.nuance.com:443 --secure \
--token $MY_TOKEN --files $1 \
--wsFile places-wordset.json

# $1 - The name and location of an input file, for example flow_compilewordsetandwatch.

@echo off
setlocal enabledelayedexpansion

set CLIENT_ID=< Mix client ID, starting with appID:>
set SECRET=<Mix client secret>
rem Change colons (:) to %3A in client ID
set CLIENT_ID=!CLIENT_ID::=%%3A!

sset command=curl -s ^
-u %CLIENT_ID%:%SECRET% ^
-d "grant_type=client_credentials" -d "scope=asr.wordset" ^
https://auth.crt.nuance.com/oauth2/token

for /f "delims={}" %%a in ('%command%') do (
  for /f "tokens=1 delims=:, " %%b in ("%%a") do set key=%%b
  for /f "tokens=2 delims=:, " %%b in ("%%a") do set value=%%b
  goto done:
)

:done

rem Remove quotes
set MY_TOKEN=!value:"=!

python training-client.py --serverUrl asr.api.nuance.com:443 --secure ^
--token %MY_TOKEN% --files %1 ^
--wsFile places-wordset.json

rem %1 - The name and location of an input file, for example flow_compilewordsetandwatch.

The results are the same as when you pass your credentials to the client and let the client generate the token when required.

Feedback

Was this page helpful?

Glad to hear it! Please tell us how we can improve.

Sorry to hear that. Please tell us how we can improve.