1
2
3 import os
4 import time
5 import subprocess
6 import tempfile
7
8 from sound_player import SoundPlayer;
11 '''
12 Abstraction for interacting with text-to-speech engines on
13 Ubuntu, Mac, and (unimplemented:) Windows. Detects Festival and
14 Cepstral engines on Ubuntu. Main public facilities: Speak an
15 utterance immediately, given a string, and generate a .wav file
16 from the text-to-speech conversion.
17 '''
18
20 self.t2sEngines = {};
21 self.defaultEngine = self.findAvailableTTSEngines();
22 self.lastUsedEngineObj = None;
23
24
25
26 - def sayToFile(self, text, voiceName=None, t2sEngineName=None, destFileName=None):
27 '''
28 Create a sound file with the result of turning the
29 string passed in parameter 'text' to sound. The
30 given voice engine and voice are used. If no destination file
31 is provided, a temporary file is created, and its filename is
32 returned after the file is closed. The caller bears responsibility
33 for removing that temporary file.
34
35 @param text: String to convert into sound.
36 @type text: string
37 @param t2sEngineName: Name of text-to-speech engine: "festival", "cepstral", "mact2s"
38 @type t2sEngineName: string
39 @param voiceName: Name of voice to use. Must be a voice supported by the given t2s engine.
40 @type voiceName: string
41 @param destFileName: Path into which the resulting sound should be directed. If not
42 provided, a temp file is created.
43 @type destFileName: string
44 @raise NotImplementedError: if voice is not supported by the given engine.
45 @raise ValueError: if given engine name does not correspond to any know text-to-speech engine.
46 '''
47 self.lastUsedEngineObj = self.getEngineObj(t2sEngineName)
48 return self.lastUsedEngineObj.sayToFile(text, voiceName, destFileName);
49
50 - def say(self, text, voiceName=None, t2sEngineName=None, blockTillDone=False):
51 '''
52 Immediately speak the given string with the given voice on the given engine.
53 If voice or engine are not provided, defaults are used.
54 @param text: String to speak
55 @type text: string
56 @param voiceName: Designation of the voice to use.
57 @type voiceName: string
58 @param t2sEngineName: Name of text-to-speech engine to use.
59 @type t2sEngineName: string
60 @param blockTillDone: If true, method will sleep till speech done, then method returns.
61 @type blockTillDone: boolean
62 @raise ValueError: if unknown text-to-speech engine.
63 @raise OSError: if failure in running the text-to-speech command in underlying shell
64 '''
65
66 self.lastUsedEngineObj = self.getEngineObj(t2sEngineName)
67 self.lastUsedEngineObj.say(text, voiceName);
68
69 if blockTillDone:
70 self.waitForSoundDone()
71
72
74 '''
75 Stop text-to-speech that is currently playing.
76 '''
77
78
79 if self.lastUsedEngineObj is not None:
80 self.lastUsedEngineObj.stop();
81
83 '''
84 Return True if any of the text-to-speech engines is currently
85 synthesizing. Else return False;
86 '''
87 if self.lastUsedEngineObj is None:
88 return False;
89 else:
90 return self.lastUsedEngineObj.busy();
91
93 '''
94 Block until speech finished playing.
95 '''
96 while self.busy():
97 time.sleep(0.3);
98
100 '''
101 Returns an array of text-to-speech engine names that
102 are available on the current machine. Exampele: ['festival', 'cepstral']
103 @return: Array of text-to-speech engine names as appropriate for ROS t2s messages.
104 Order within the array is not necessarily the same between calls.
105 @rtype: [string]
106
107 '''
108 return self.t2sEngines.keys();
109
110 - def availableVoices(self):
111 '''
112 Returns a dictionary of all available voices for each
113 text-to-speech engine. Keys are the engine names. Example:
114 1. Cepstral : ['David', 'Anna']
115 2. Festival : ['voice_kal_diphone']
116 The default voice for each engine is guaranteed to be the
117 first in the voice lists. Order of the remaining voices is
118 arbitrary.
119 @return: Dictionary mapping text-to-speech engine names to lists of voice names.
120 @rtype: {string : [string]}
121 '''
122 voiceListDict = {};
123 for ttsEngineObj in self.t2sEngines.values():
124 voiceList = [ttsEngineObj.getDefaultVoice()];
125 thisEngVoices = ttsEngineObj.getVoiceList();
126 for voice in thisEngVoices:
127 if voice == voiceList[0]:
128 continue;
129 else:
130 voiceList.append(voice);
131 voiceListDict[ttsEngineObj.getEngineName()] = voiceList
132 return voiceListDict;
133
134
135
136 - def getEngineObj(self, engineName):
137 '''
138 From a text-to-speech engine name that may be None,
139 return an engine object.
140 @param engineName: Name of text-to-speech engine to use
141 @type engineName: string
142 @return: Subclass of TextToSpeechEngine
143 '''
144 try:
145 if engineName is None:
146 return self.defaultEngine;
147 else:
148 return self.t2sEngines[str(engineName).lower()];
149 except KeyError:
150 raise ValueError("Unknown text-to-speech engine: " + str(engineName));
151
153 '''
154 Try to sense the underlying OS. Then identify the available
155 text-to-speech engines. Return the default engine to be used.
156 @return: Default engine instance.
157 @rtype: TextToSpeechEngine subclass
158 @raise ValueError: if no speech engine is found.
159 '''
160 defaultEngine = None;
161 if os.uname()[0].lower().find('linux') > -1:
162 defaultEngine = self.linuxTTSEngines();
163 if os.uname()[0].lower().find('cygwin') > -1:
164 defaultEngine = self.windowsTTSEngines();
165 if os.uname()[0].lower().find('mac') > -1:
166 defaultEngine = self.macTTSEngines();
167
168 if len(self.t2sEngines) == 0:
169 raise ValueError("No text-to-speech engine found.");
170 return defaultEngine;
171
172 - def linuxTTSEngines(self):
173 '''
174 Called if underlying machine is Linux. Explores which
175 text-to-speech engines are available. Festival is built
176 into Ubuntu. Cepstral is a for-pay engine.
177 @return: Text-to-speech engine instance to use as default. None if no
178 text-to-speech-engine is available.
179 @rtype: TexToSpeechEngine
180 '''
181
182 festivalPath = TextToSpeechProvider.which("text2wave");
183 if (festivalPath is not None):
184 self.t2sEngines["festival"] = Festival();
185
186 cepstralSwiftPath = TextToSpeechProvider.which("swift");
187 if (cepstralSwiftPath is not None):
188 self.t2sEngines["cepstral"] = Cepstral();
189
190
191 try:
192 return self.t2sEngines["cepstral"];
193 except KeyError:
194 pass;
195 try:
196 return self.t2sEngines["festival"];
197 except KeyError:
198 pass;
199
200 return None;
201
202
203 - def macTTSEngines(self):
204 '''
205 Called if underlying machine is Mac. Explores which
206 text-to-speech engines are available.
207 @return: Text-to-speech engine instance to use as default. None if no
208 text-to-speech-engine is available.
209 @rtype: TexToSpeechEngine
210 '''
211 self.t2sEngines["mact2s"] = MacTextToSpeech();
212 return self.t2sEngines["mact2s"];
213
215 raise NotImplementedError("Windows text-to-speech not yet implemented.");
216
217 @staticmethod
218 - def which(program):
219 '''
220 Implements the Unix 'which' shell command, extended to consider
221 not just $PATH, but also $PYTHONPATH when searching for an
222 executable of the given name (the program parameter). $PATH
223 is given preference; it is searched first.
224 @param program: Name of the executable with or without path
225 @type program: string
226 @return: None if no executable found, else full path to executable.
227 '''
228 def is_exe(fpath):
229 return os.path.exists(fpath) and os.access(fpath, os.X_OK)
230
231 def ext_candidates(fpath):
232 yield fpath
233 for ext in os.environ.get("PATHEXT", "").split(os.pathsep):
234 yield fpath + ext
235
236 fpath, fname = os.path.split(program)
237 if fpath:
238 if is_exe(program):
239 return program
240 else:
241 for path in os.environ["PATH"].split(os.pathsep):
242 exe_file = os.path.join(path, program)
243 for candidate in ext_candidates(exe_file):
244 if is_exe(candidate):
245 return candidate
246
247 def is_exe(fpath):
248 return os.path.exists(fpath) and os.access(fpath, os.X_OK)
249
250 def ext_candidates(fpath):
251 yield fpath
252 for ext in os.environ.get("PATHEXT", "").split(os.pathsep):
253 yield fpath + ext
254
255 fpath, fname = os.path.split(program)
256 if fpath:
257 if is_exe(program):
258 return program
259 else:
260 for path in os.environ["PATH"].split(os.pathsep):
261 exe_file = os.path.join(path, program)
262 for candidate in ext_candidates(exe_file):
263 if is_exe(candidate):
264 return candidate
265
266
267
268 pPath = os.getenv("PYTHONPATH")
269 if pPath is None:
270 return None
271 for path in pPath.split(os.pathsep):
272 exe_file = os.path.join(path, program)
273 for candidate in ext_candidates(exe_file):
274 if is_exe(candidate):
275 return candidate
276
277 return None
278
279
280
281 -class TextToSpeechEngine(object):
282 '''
283 Abstract class from which text-to-speech engine classes are derived.
284 '''
285
286 - def __init__(self):
287 self.fullPathToExecutable = None;
288
289 self.defaultVoice = None;
290
291 - def getEngineName(self):
292 return self.ttsEngineName;
293
294 - def getDefaultVoice(self):
295 return self.defaultVoice;
296
298 return self.t2sDestFilename;
299
300 - def checkVoiceValid(self, voice, defaultVoice):
301 '''
302 Called from subclasses. Given the name of a voice, check whether it is either None,
303 or a voice that is supported by the sound engine. If voice is None, the name of the
304 engine's default voice is returned.
305 @param voice: Name of voice, or None
306 @type voice: {string | NoneType}
307 '''
308 if voice is None:
309 return defaultVoice;
310 else:
311 try:
312 self.getVoiceList().index(voice);
313 except ValueError:
314 raise ValueError("Voice engine %s does not support a voice named %s." % (self.getEngineName(), str(voice)));
315 return voice;
316
317 - def bashQuotify(self, str):
318 '''
319 Bash does not tolerate single quotes within single-quoted strings.
320 Backslashing the embedded single quote does *not* work. Thus:
321 echo 'That's it' will lead to error as expected, but so will
322 echo 'That\'s it'. The solution is to replace all single quotes
323 with '\'' (all quotes are single quotes here).
324
325 @param str: String in which to make single quotes safe. Ok not to
326 have any single quotes.
327 @type str: string
328 '''
329
330 return str.replace("'", "'" + "\\" + "'" + "'");
331
334 '''
335 Wrapper for the Linux Festival text-to-speech engine.
336 '''
337
339 super(Festival, self).__init__()
340 self.ttsEngineName = "festival";
341 self.fullPathToExecutable = TextToSpeechProvider.which("festival");
342 if self.fullPathToExecutable is None:
343 raise NotImplementedError("Festival text-to-speech engine is not implemented.")
344 self.voiceList = None;
345 self.getVoiceList();
346 self.txtfile = tempfile.NamedTemporaryFile(prefix='speakeasy', suffix='.txt')
347 self.txtfilename = self.txtfile.name
348
349 - def say(self, text, voice=None):
350
351
352
353
354 commandLine = 'echo "' + str(text) + '" | padsp festival --tts &';
355 os.system(commandLine);
356
357 - def sayToFile(self, text, voice=None, destFileName=None):
358
359
360
361
362
363 voice = self.checkVoiceValid(voice, self.defaultVoice);
364 if destFileName is None:
365 (destFile, destFileName) = tempfile.mkstemp(prefix='speakeasy', suffix='.wav')
366 else:
367 destFile = os.open(destFileName, 'w')
368
369 failureMsg = "Sound synthesis failed. Is the Festival engine installed? Is a festival voice installed? Try running 'rosdep satisfy speakeasy|sh'. Refer to http://pr.willowgarage.com/wiki/sound_play/Troubleshooting"
370 self.txtfile.write(text)
371 self.txtfile.flush()
372 commandLine = "text2wave -eval '(" + str(voice) + ")' " + self.txtfilename + " -o " + str(destFileName) + " &";
373 os.system(commandLine);
374 self.txtfile.close();
375 if os.stat(destFileName).st_size == 0:
376 raise OSError(failureMsg);
377 return destFileName;
378
380 os.system("killall " + str("audsp"));
381
383 for line in os.popen("ps xa"):
384 if line.find("festival") > -1:
385 return True;
386 return False;
387
389
390
391
392 if self.voiceList is not None:
393 return self.voiceList;
394
395 self.voiceList = ['voice_kal_diphone'];
396 self.defaultVoice = "voice_kal_diphone";
397 return self.voiceList;
398
399 - def initTextFile(self, text):
400
401
402 self.txtfile.truncate();
403
404
405
406 -class Cepstral(TextToSpeechEngine):
407 '''
408 Wrapper for Cepstral text-to-speech engine.
409 '''
410
412 super(Cepstral, self).__init__()
413
414 self.fullPathToExecutable = TextToSpeechProvider.which("swift");
415 if self.fullPathToExecutable is None:
416 raise NotImplementedError("Cepstral text-to-speech engine is not implemented.")
417
418 self.t2sDestFilename = tempfile.NamedTemporaryFile(prefix='speakeasy', suffix='.wav')
419 self.voiceList = None;
420 self.getVoiceList();
421 self.ttsEngineName = "cepstral";
422
423 - def say(self, text, voice=None):
424
425
426
427 text = self.bashQuotify(text);
428
429 voice = self.checkVoiceValid(voice, self.defaultVoice);
430 failureMsg = "Sound synthesis failed. Is Cepstral's swift installed? Is a Cepstral voice installed? Try running 'rosdep satisfy speakeasy|sh'. Refer to http://pr.willowgarage.com/wiki/sound_play/Troubleshooting"
431 commandLine = "padsp swift -n " + str(voice) + " '" + str(text) + "' &";
432 os.system(commandLine);
433
435 os.system("killall --quiet " + str("swift.bin"));
436
437 - def sayToFile(self, text, voice=None, destFileName=None):
438
439
440
441 text = self.bashQuotify(text);
442
443 voice = self.checkVoiceValid(voice, self.defaultVoice);
444 if destFileName is None:
445 (destFile, destFileName) = tempfile.mkstemp(prefix='speakeasy', suffix='.wav')
446 else:
447 destFile = os.open(destFileName, 'w')
448
449 failureMsg = "Sound synthesis failed. Is Cepstral's swift installed? Is a Cepstral voice installed? Try running 'rosdep satisfy speakeasy|sh'. Refer to http://pr.willowgarage.com/wiki/sound_play/Troubleshooting"
450 commandLine = 'padsp swift -n ' + str(voice) + ' "' + str(text) + '"' + ' -o ' + str(destFileName) + " &";
451 os.system(commandLine);
452 os.close(destFile);
453 if os.stat(destFileName).st_size == 0:
454 raise OSError(failureMsg);
455 return destFileName;
456
458 for line in os.popen("ps xa"):
459 if line.find("swift") > -1:
460 return True;
461 return False;
462
464
465 if self.voiceList is not None:
466 return self.voiceList;
467
468 absSwiftPath = TextToSpeechProvider.which("swift");
469 if absSwiftPath is None:
470 raise NotImplemented("Cannot find the Cepstral executable 'swift'. Make sure it is in $PATH or $PYTHONPATH of the shell where this Python program started.")
471 p = subprocess.Popen([absSwiftPath, "--voices"], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
472 tbl, errors = p.communicate()
473 lines = tbl.splitlines();
474
475
476 for lineNum, line in enumerate(lines):
477 if not line.startswith("-"):
478 continue;
479 break
480 voiceNames = []
481 for voiceLine in lines[lineNum:]:
482 if not voiceLine.startswith("-"):
483 voiceNames.append(str(voiceLine.split()[0]))
484
485
486 self.voiceList = voiceNames;
487 self.defaultVoice = "David";
488 return voiceNames
489
490 -class MacTextToSpeech(TextToSpeechEngine):
491 '''
492 Wrapper for the Mac text to speech engine.
493 The command line t2s command on the Mac works like this::
494 say [-v <voice>] [-f <inputFile>] [-o <aiffOutputFile>]
495 '''
496
497 - def __init__(self):
498 super(MacTextToSpeech, self).__init__();
499
500 self.fullPathToExecutable = TextToSpeechProvider.which("say");
501 if self.fullPathToExecutable is None:
502 raise NotImplementedError("Mac text-to-speech engine is not implemented/found.")
503
504 self.t2sDestFilename = tempfile.NamedTemporaryFile(prefix='speakeasy', suffix='.aiff')
505 self.voiceList = None;
506 self.getVoiceList();
507 self.ttsEngineName = "mact2s";
508
509 - def say(self, text, voice=None):
510
511
512
513 text = self.bashQuotify(text);
514
515 voice = self.checkVoiceValid(voice, self.defaultVoice);
516 commandLine = 'say -v ' + str(voice) + ' "' + str(text) + '" &';
517 os.system(commandLine);
518
519 - def sayToFile(self, text, voice=None, destFileName=None):
520
521
522
523 text = self.bashQuotify(text);
524
525 voice = self.checkVoiceValid(voice, self.defaultVoice);
526 if destFileName is None:
527 (destFile, destFileName) = tempfile.mkstemp(prefix='speakeasy', suffix='.wav')
528 else:
529 destFile = os.open(destFileName, 'w')
530
531
532 commandLine = 'say -v ' + str(voice) + '-o ' + str(destFileName) + ' "' + str(text) + '" &';
533 os.system(commandLine);
534 os.close(destFile);
535 if os.stat(destFileName).st_size == 0:
536 raise OSError(failureMsg);
537 return destFileName;
538
540 os.system("killall --quiet " + str("say"))
541
543 for line in os.popen("ps xa"):
544 if line.find("say") > -1:
545 return True;
546 return False;
547
548 - def getVoiceList(self):
549
550 TextToSpeechProvider
551 if self.voiceList is not None:
552 return self.voiceList;
553
554 self.voiceList = ["Alex"];
555 self.defaultVoice = "Alex";
556 return self.voiceList;
557
558 if __name__ == "__main__":
559
560 tte = TextToSpeechProvider();
561 print "Test defaulting t2s engine and voice"
562
563 print "Done testing defaulting t2s engine and voice"
564 print "---------------"
565
566 print "Test default t2s engine, ask for particular voice"
567
568 print "Done testing default t2s engine, ask for particular voice"
569 print "---------------"
570
571
572 print "Test deliberate voice name-unknown error."
573
574
575
576
577
578 print "Done testing deliberate voice name-unknown error."
579 print "---------------"
580
581 print "Test ask for specific t2s engine"
582
583 print "Done testing ask for specific t2s engine"
584 print "---------------"
585
586 print "Test tolerance to wrong case in speech engine name:"
587
588 print "Done testing tolerance to wrong case in speech engine name:"
589 print "---------------"
590
591 soundPlayer = SoundPlayer();
592
593 print "Test say-to-file Cepstral"
594
595
596
597
598 print "Done testing say-to-file Cepstral"
599 print "---------------"
600
601 print "Test say-to-file Festival"
602
603
604
605
606 print "Done testing say-to-file Festival"
607 print "---------------"
608
609 print "Test getting list of available t2s engines..."
610 print str(tte.availableTextToSpeechEngines());
611 print "Done testing getting list of available t2s engines."
612 print "---------------"
613
614 print "Test getting dict of available voices..."
615 print str(tte.availableVoices());
616 print "Done testing getting dict of available voices."
617 print "---------------"
618
619 print "Done";
620