Package speakeasy :: Module text_to_speech
[hide private]
[frames] | no frames]

Source Code for Module speakeasy.text_to_speech

  1  #!/usr/bin/env python 
  2   
  3  import os 
  4  import time 
  5  import subprocess 
  6  import tempfile 
  7   
  8  from sound_player import SoundPlayer; 
9 10 -class TextToSpeechProvider(object):
11 ''' 12 Abstraction for interacting with text-to-speech engines on 13 Ubuntu, Mac, and (unimplemented:) Windows. Detects Festival and 14 Cepstral engines on Ubuntu. Main public facilities: Speak an 15 utterance immediately, given a string, and generate a .wav file 16 from the text-to-speech conversion. 17 ''' 18
19 - def __init__(self):
20 self.t2sEngines = {}; 21 self.defaultEngine = self.findAvailableTTSEngines(); 22 self.lastUsedEngineObj = None;
23 24 # ----------------------------------------------- Public Methods --------------------------------- 25
26 - def sayToFile(self, text, voiceName=None, t2sEngineName=None, destFileName=None):
27 ''' 28 Create a sound file with the result of turning the 29 string passed in parameter 'text' to sound. The 30 given voice engine and voice are used. If no destination file 31 is provided, a temporary file is created, and its filename is 32 returned after the file is closed. The caller bears responsibility 33 for removing that temporary file. 34 35 @param text: String to convert into sound. 36 @type text: string 37 @param t2sEngineName: Name of text-to-speech engine: "festival", "cepstral", "mact2s" 38 @type t2sEngineName: string 39 @param voiceName: Name of voice to use. Must be a voice supported by the given t2s engine. 40 @type voiceName: string 41 @param destFileName: Path into which the resulting sound should be directed. If not 42 provided, a temp file is created. 43 @type destFileName: string 44 @raise NotImplementedError: if voice is not supported by the given engine. 45 @raise ValueError: if given engine name does not correspond to any know text-to-speech engine. 46 ''' 47 self.lastUsedEngineObj = self.getEngineObj(t2sEngineName) 48 return self.lastUsedEngineObj.sayToFile(text, voiceName, destFileName);
49
50 - def say(self, text, voiceName=None, t2sEngineName=None, blockTillDone=False):
51 ''' 52 Immediately speak the given string with the given voice on the given engine. 53 If voice or engine are not provided, defaults are used. 54 @param text: String to speak 55 @type text: string 56 @param voiceName: Designation of the voice to use. 57 @type voiceName: string 58 @param t2sEngineName: Name of text-to-speech engine to use. 59 @type t2sEngineName: string 60 @param blockTillDone: If true, method will sleep till speech done, then method returns. 61 @type blockTillDone: boolean 62 @raise ValueError: if unknown text-to-speech engine. 63 @raise OSError: if failure in running the text-to-speech command in underlying shell 64 ''' 65 66 self.lastUsedEngineObj = self.getEngineObj(t2sEngineName) 67 self.lastUsedEngineObj.say(text, voiceName); 68 # Caller wants to block till sound done? 69 if blockTillDone: 70 self.waitForSoundDone()
71 72
73 - def stop(self):
74 ''' 75 Stop text-to-speech that is currently playing. 76 ''' 77 # Stop might be called before any tts was played. 78 # Therefore the test for None: 79 if self.lastUsedEngineObj is not None: 80 self.lastUsedEngineObj.stop();
81
82 - def busy(self):
83 ''' 84 Return True if any of the text-to-speech engines is currently 85 synthesizing. Else return False; 86 ''' 87 if self.lastUsedEngineObj is None: 88 return False; 89 else: 90 return self.lastUsedEngineObj.busy();
91
92 - def waitForSoundDone(self):
93 ''' 94 Block until speech finished playing. 95 ''' 96 while self.busy(): 97 time.sleep(0.3);
98
100 ''' 101 Returns an array of text-to-speech engine names that 102 are available on the current machine. Exampele: ['festival', 'cepstral'] 103 @return: Array of text-to-speech engine names as appropriate for ROS t2s messages. 104 Order within the array is not necessarily the same between calls. 105 @rtype: [string] 106 107 ''' 108 return self.t2sEngines.keys();
109
110 - def availableVoices(self):
111 ''' 112 Returns a dictionary of all available voices for each 113 text-to-speech engine. Keys are the engine names. Example: 114 1. Cepstral : ['David', 'Anna'] 115 2. Festival : ['voice_kal_diphone'] 116 The default voice for each engine is guaranteed to be the 117 first in the voice lists. Order of the remaining voices is 118 arbitrary. 119 @return: Dictionary mapping text-to-speech engine names to lists of voice names. 120 @rtype: {string : [string]} 121 ''' 122 voiceListDict = {}; 123 for ttsEngineObj in self.t2sEngines.values(): 124 voiceList = [ttsEngineObj.getDefaultVoice()]; 125 thisEngVoices = ttsEngineObj.getVoiceList(); 126 for voice in thisEngVoices: 127 if voice == voiceList[0]: 128 continue; 129 else: 130 voiceList.append(voice); 131 voiceListDict[ttsEngineObj.getEngineName()] = voiceList 132 return voiceListDict;
133 134 # ----------------------------------------------- Private Methods --------------------------------- 135
136 - def getEngineObj(self, engineName):
137 ''' 138 From a text-to-speech engine name that may be None, 139 return an engine object. 140 @param engineName: Name of text-to-speech engine to use 141 @type engineName: string 142 @return: Subclass of TextToSpeechEngine 143 ''' 144 try: 145 if engineName is None: 146 return self.defaultEngine; 147 else: 148 return self.t2sEngines[str(engineName).lower()]; 149 except KeyError: 150 raise ValueError("Unknown text-to-speech engine: " + str(engineName));
151
152 - def findAvailableTTSEngines(self):
153 ''' 154 Try to sense the underlying OS. Then identify the available 155 text-to-speech engines. Return the default engine to be used. 156 @return: Default engine instance. 157 @rtype: TextToSpeechEngine subclass 158 @raise ValueError: if no speech engine is found. 159 ''' 160 defaultEngine = None; 161 if os.uname()[0].lower().find('linux') > -1: 162 defaultEngine = self.linuxTTSEngines(); 163 if os.uname()[0].lower().find('cygwin') > -1: 164 defaultEngine = self.windowsTTSEngines(); 165 if os.uname()[0].lower().find('mac') > -1: 166 defaultEngine = self.macTTSEngines(); 167 168 if len(self.t2sEngines) == 0: 169 raise ValueError("No text-to-speech engine found."); 170 return defaultEngine;
171
172 - def linuxTTSEngines(self):
173 ''' 174 Called if underlying machine is Linux. Explores which 175 text-to-speech engines are available. Festival is built 176 into Ubuntu. Cepstral is a for-pay engine. 177 @return: Text-to-speech engine instance to use as default. None if no 178 text-to-speech-engine is available. 179 @rtype: TexToSpeechEngine 180 ''' 181 182 festivalPath = TextToSpeechProvider.which("text2wave"); 183 if (festivalPath is not None): 184 self.t2sEngines["festival"] = Festival(); 185 186 cepstralSwiftPath = TextToSpeechProvider.which("swift"); 187 if (cepstralSwiftPath is not None): 188 self.t2sEngines["cepstral"] = Cepstral(); 189 190 # If Cepstral is available, make it the default engine: 191 try: 192 return self.t2sEngines["cepstral"]; 193 except KeyError: 194 pass; 195 try: 196 return self.t2sEngines["festival"]; 197 except KeyError: 198 pass; 199 200 return None;
201 202
203 - def macTTSEngines(self):
204 ''' 205 Called if underlying machine is Mac. Explores which 206 text-to-speech engines are available. 207 @return: Text-to-speech engine instance to use as default. None if no 208 text-to-speech-engine is available. 209 @rtype: TexToSpeechEngine 210 ''' 211 self.t2sEngines["mact2s"] = MacTextToSpeech(); 212 return self.t2sEngines["mact2s"];
213
214 - def windowsTTSEngines(self):
215 raise NotImplementedError("Windows text-to-speech not yet implemented.");
216 217 @staticmethod
218 - def which(program):
219 ''' 220 Implements the Unix 'which' shell command, extended to consider 221 not just $PATH, but also $PYTHONPATH when searching for an 222 executable of the given name (the program parameter). $PATH 223 is given preference; it is searched first. 224 @param program: Name of the executable with or without path 225 @type program: string 226 @return: None if no executable found, else full path to executable. 227 ''' 228 def is_exe(fpath): 229 return os.path.exists(fpath) and os.access(fpath, os.X_OK)
230 231 def ext_candidates(fpath): 232 yield fpath 233 for ext in os.environ.get("PATHEXT", "").split(os.pathsep): 234 yield fpath + ext
235 236 fpath, fname = os.path.split(program) 237 if fpath: 238 if is_exe(program): 239 return program 240 else: 241 for path in os.environ["PATH"].split(os.pathsep): 242 exe_file = os.path.join(path, program) 243 for candidate in ext_candidates(exe_file): 244 if is_exe(candidate): 245 return candidate 246 247 def is_exe(fpath): 248 return os.path.exists(fpath) and os.access(fpath, os.X_OK) 249 250 def ext_candidates(fpath): 251 yield fpath 252 for ext in os.environ.get("PATHEXT", "").split(os.pathsep): 253 yield fpath + ext 254 255 fpath, fname = os.path.split(program) 256 if fpath: 257 if is_exe(program): 258 return program 259 else: 260 for path in os.environ["PATH"].split(os.pathsep): 261 exe_file = os.path.join(path, program) 262 for candidate in ext_candidates(exe_file): 263 if is_exe(candidate): 264 return candidate 265 266 267 # Try the Pythonpath: 268 pPath = os.getenv("PYTHONPATH") 269 if pPath is None: 270 return None 271 for path in pPath.split(os.pathsep): 272 exe_file = os.path.join(path, program) 273 for candidate in ext_candidates(exe_file): 274 if is_exe(candidate): 275 return candidate 276 277 return None 278
279 # ---------------------------------- Text to Speech Engine Classes and Subclasses --------------- 280 281 -class TextToSpeechEngine(object):
282 ''' 283 Abstract class from which text-to-speech engine classes are derived. 284 ''' 285
286 - def __init__(self):
287 self.fullPathToExecutable = None; 288 # Subclasses must overrided the following instance var: 289 self.defaultVoice = None;
290
291 - def getEngineName(self):
292 return self.ttsEngineName;
293
294 - def getDefaultVoice(self):
295 return self.defaultVoice;
296
297 - def getT2SDestFilename(self):
298 return self.t2sDestFilename;
299
300 - def checkVoiceValid(self, voice, defaultVoice):
301 ''' 302 Called from subclasses. Given the name of a voice, check whether it is either None, 303 or a voice that is supported by the sound engine. If voice is None, the name of the 304 engine's default voice is returned. 305 @param voice: Name of voice, or None 306 @type voice: {string | NoneType} 307 ''' 308 if voice is None: 309 return defaultVoice; 310 else: 311 try: 312 self.getVoiceList().index(voice); 313 except ValueError: 314 raise ValueError("Voice engine %s does not support a voice named %s." % (self.getEngineName(), str(voice))); 315 return voice;
316
317 - def bashQuotify(self, str):
318 ''' 319 Bash does not tolerate single quotes within single-quoted strings. 320 Backslashing the embedded single quote does *not* work. Thus: 321 echo 'That's it' will lead to error as expected, but so will 322 echo 'That\'s it'. The solution is to replace all single quotes 323 with '\'' (all quotes are single quotes here). 324 325 @param str: String in which to make single quotes safe. Ok not to 326 have any single quotes. 327 @type str: string 328 ''' 329 330 return str.replace("'", "'" + "\\" + "'" + "'");
331
332 333 -class Festival(TextToSpeechEngine):
334 ''' 335 Wrapper for the Linux Festival text-to-speech engine. 336 ''' 337
338 - def __init__(self):
339 super(Festival, self).__init__() 340 self.ttsEngineName = "festival"; 341 self.fullPathToExecutable = TextToSpeechProvider.which("festival"); 342 if self.fullPathToExecutable is None: 343 raise NotImplementedError("Festival text-to-speech engine is not implemented.") 344 self.voiceList = None; 345 self.getVoiceList(); 346 self.txtfile = tempfile.NamedTemporaryFile(prefix='speakeasy', suffix='.txt') 347 self.txtfilename = self.txtfile.name
348
349 - def say(self, text, voice=None):
350 # Ensure that embedded single quotes are properly 351 # escaped for the Bash shell to deal with them: 352 #******text = self.bashQuotify(text); 353 # Too complicated to set a voice for now. Ignore that parameter. 354 commandLine = 'echo "' + str(text) + '" | padsp festival --tts &'; 355 os.system(commandLine);
356
357 - def sayToFile(self, text, voice=None, destFileName=None):
358 359 # Ensure that embedded single quotes are properly 360 # escaped for the Bash shell to deal with them: 361 #*********text = self.bashQuotify(text); 362 363 voice = self.checkVoiceValid(voice, self.defaultVoice); 364 if destFileName is None: 365 (destFile, destFileName) = tempfile.mkstemp(prefix='speakeasy', suffix='.wav') 366 else: 367 destFile = os.open(destFileName, 'w') 368 369 failureMsg = "Sound synthesis failed. Is the Festival engine installed? Is a festival voice installed? Try running 'rosdep satisfy speakeasy|sh'. Refer to http://pr.willowgarage.com/wiki/sound_play/Troubleshooting" 370 self.txtfile.write(text) 371 self.txtfile.flush() 372 commandLine = "text2wave -eval '(" + str(voice) + ")' " + self.txtfilename + " -o " + str(destFileName) + " &"; 373 os.system(commandLine); 374 self.txtfile.close(); 375 if os.stat(destFileName).st_size == 0: 376 raise OSError(failureMsg); 377 return destFileName;
378
379 - def stop(self):
380 os.system("killall " + str("audsp"));
381
382 - def busy(self):
383 for line in os.popen("ps xa"): 384 if line.find("festival") > -1: 385 return True; 386 return False;
387
388 - def getVoiceList(self):
389 #TODO: run festival, and issue Scheme command (voice.list) 390 # to get the real list of voices. 391 392 if self.voiceList is not None: 393 return self.voiceList; 394 395 self.voiceList = ['voice_kal_diphone']; 396 self.defaultVoice = "voice_kal_diphone"; 397 return self.voiceList;
398
399 - def initTextFile(self, text):
400 # Just in case client calls getCommandString() twice on the same 401 # TTS instance: 402 self.txtfile.truncate();
403
404 405 406 -class Cepstral(TextToSpeechEngine):
407 ''' 408 Wrapper for Cepstral text-to-speech engine. 409 ''' 410
411 - def __init__(self):
412 super(Cepstral, self).__init__() 413 414 self.fullPathToExecutable = TextToSpeechProvider.which("swift"); 415 if self.fullPathToExecutable is None: 416 raise NotImplementedError("Cepstral text-to-speech engine is not implemented.") 417 418 self.t2sDestFilename = tempfile.NamedTemporaryFile(prefix='speakeasy', suffix='.wav') 419 self.voiceList = None; 420 self.getVoiceList(); 421 self.ttsEngineName = "cepstral";
422
423 - def say(self, text, voice=None):
424 425 # Ensure that embedded single quotes are properly 426 # escaped for the Bash shell to deal with them: 427 text = self.bashQuotify(text); 428 429 voice = self.checkVoiceValid(voice, self.defaultVoice); 430 failureMsg = "Sound synthesis failed. Is Cepstral's swift installed? Is a Cepstral voice installed? Try running 'rosdep satisfy speakeasy|sh'. Refer to http://pr.willowgarage.com/wiki/sound_play/Troubleshooting" 431 commandLine = "padsp swift -n " + str(voice) + " '" + str(text) + "' &"; 432 os.system(commandLine);
433
434 - def stop(self):
435 os.system("killall --quiet " + str("swift.bin"));
436
437 - def sayToFile(self, text, voice=None, destFileName=None):
438 439 # Ensure that embedded single quotes are properly 440 # escaped for the Bash shell to deal with them: 441 text = self.bashQuotify(text); 442 443 voice = self.checkVoiceValid(voice, self.defaultVoice); 444 if destFileName is None: 445 (destFile, destFileName) = tempfile.mkstemp(prefix='speakeasy', suffix='.wav') 446 else: 447 destFile = os.open(destFileName, 'w') 448 449 failureMsg = "Sound synthesis failed. Is Cepstral's swift installed? Is a Cepstral voice installed? Try running 'rosdep satisfy speakeasy|sh'. Refer to http://pr.willowgarage.com/wiki/sound_play/Troubleshooting" 450 commandLine = 'padsp swift -n ' + str(voice) + ' "' + str(text) + '"' + ' -o ' + str(destFileName) + " &"; 451 os.system(commandLine); 452 os.close(destFile); 453 if os.stat(destFileName).st_size == 0: 454 raise OSError(failureMsg); 455 return destFileName;
456
457 - def busy(self):
458 for line in os.popen("ps xa"): 459 if line.find("swift") > -1: 460 return True; 461 return False;
462
463 - def getVoiceList(self):
464 465 if self.voiceList is not None: 466 return self.voiceList; 467 468 absSwiftPath = TextToSpeechProvider.which("swift"); 469 if absSwiftPath is None: 470 raise NotImplemented("Cannot find the Cepstral executable 'swift'. Make sure it is in $PATH or $PYTHONPATH of the shell where this Python program started.") 471 p = subprocess.Popen([absSwiftPath, "--voices"], stdout=subprocess.PIPE,stderr=subprocess.PIPE) 472 tbl, errors = p.communicate() 473 lines = tbl.splitlines(); 474 475 # Throw out header lines: 476 for lineNum, line in enumerate(lines): 477 if not line.startswith("-"): 478 continue; 479 break 480 voiceNames = [] 481 for voiceLine in lines[lineNum:]: 482 if not voiceLine.startswith("-"): 483 voiceNames.append(str(voiceLine.split()[0])) 484 485 # Remember the list for any subsequent calls: 486 self.voiceList = voiceNames; 487 self.defaultVoice = "David"; 488 return voiceNames
489
490 -class MacTextToSpeech(TextToSpeechEngine):
491 ''' 492 Wrapper for the Mac text to speech engine. 493 The command line t2s command on the Mac works like this:: 494 say [-v <voice>] [-f <inputFile>] [-o <aiffOutputFile>] 495 ''' 496
497 - def __init__(self):
498 super(MacTextToSpeech, self).__init__(); 499 500 self.fullPathToExecutable = TextToSpeechProvider.which("say"); 501 if self.fullPathToExecutable is None: 502 raise NotImplementedError("Mac text-to-speech engine is not implemented/found.") 503 504 self.t2sDestFilename = tempfile.NamedTemporaryFile(prefix='speakeasy', suffix='.aiff') 505 self.voiceList = None; 506 self.getVoiceList(); 507 self.ttsEngineName = "mact2s";
508
509 - def say(self, text, voice=None):
510 511 # Ensure that embedded single quotes are properly 512 # escaped for the Bash shell to deal with them: 513 text = self.bashQuotify(text); 514 515 voice = self.checkVoiceValid(voice, self.defaultVoice); 516 commandLine = 'say -v ' + str(voice) + ' "' + str(text) + '" &'; 517 os.system(commandLine);
518
519 - def sayToFile(self, text, voice=None, destFileName=None):
520 521 # Ensure that embedded single quotes are properly 522 # escaped for the Bash shell to deal with them: 523 text = self.bashQuotify(text); 524 525 voice = self.checkVoiceValid(voice, self.defaultVoice); 526 if destFileName is None: 527 (destFile, destFileName) = tempfile.mkstemp(prefix='speakeasy', suffix='.wav') 528 else: 529 destFile = os.open(destFileName, 'w') 530 531 #commandLine = 'say -v ' + str(voice) + ' "' + str(text) + '"'; 532 commandLine = 'say -v ' + str(voice) + '-o ' + str(destFileName) + ' "' + str(text) + '" &'; 533 os.system(commandLine); 534 os.close(destFile); 535 if os.stat(destFileName).st_size == 0: 536 raise OSError(failureMsg); 537 return destFileName;
538
539 - def stop(self):
540 os.system("killall --quiet " + str("say"))
541
542 - def busy(self):
543 for line in os.popen("ps xa"): 544 if line.find("say") > -1: 545 return True; 546 return False;
547
548 - def getVoiceList(self):
549 #TODO: find all Mac voices automatically 550 TextToSpeechProvider 551 if self.voiceList is not None: 552 return self.voiceList; 553 554 self.voiceList = ["Alex"]; 555 self.defaultVoice = "Alex"; 556 return self.voiceList;
557 558 if __name__ == "__main__": 559 560 tte = TextToSpeechProvider(); 561 print "Test defaulting t2s engine and voice" 562 # tte.say("This is a test.") 563 print "Done testing defaulting t2s engine and voice" 564 print "---------------" 565 566 print "Test default t2s engine, ask for particular voice" 567 # tte.say("This is a test.", voiceName='David'); 568 print "Done testing default t2s engine, ask for particular voice" 569 print "---------------" 570 571 572 print "Test deliberate voice name-unknown error." 573 # try: 574 # tte.say("This is a test.", voiceName='Alex'); 575 # except ValueError: 576 # pass # Expected. 577 # 578 print "Done testing deliberate voice name-unknown error." 579 print "---------------" 580 581 print "Test ask for specific t2s engine" 582 # tte.say("This is a test", t2sEngineName="festival"); 583 print "Done testing ask for specific t2s engine" 584 print "---------------" 585 586 print "Test tolerance to wrong case in speech engine name:" 587 # tte.say("This is a test", t2sEngineName="Festival"); 588 print "Done testing tolerance to wrong case in speech engine name:" 589 print "---------------" 590 591 soundPlayer = SoundPlayer(); 592 593 print "Test say-to-file Cepstral" 594 # fileName = tte.sayToFile("Testing Cepstral say to file."); 595 # print "Cepstral printed to: " + str(fileName); 596 # soundPlayer.play(fileName, blockTillDone=True); 597 # os.remove(fileName); 598 print "Done testing say-to-file Cepstral" 599 print "---------------" 600 601 print "Test say-to-file Festival" 602 # fileName = tte.sayToFile("Testing Festival say to file.", t2sEngineName="Festival"); 603 # print "Festival printed to: " + str(fileName); 604 # soundPlayer.play(fileName, blockTillDone=True); 605 # os.remove(fileName); 606 print "Done testing say-to-file Festival" 607 print "---------------" 608 609 print "Test getting list of available t2s engines..." 610 print str(tte.availableTextToSpeechEngines()); 611 print "Done testing getting list of available t2s engines." 612 print "---------------" 613 614 print "Test getting dict of available voices..." 615 print str(tte.availableVoices()); 616 print "Done testing getting dict of available voices." 617 print "---------------" 618 619 print "Done"; 620