1
2
3 import unittest;
4
5 import re;
15
19
20 openMark = '[';
21 closeMark = ']';
22 marks = {Markup.SILENCE : openMark +'S',
23 Markup.RATE : openMark +'R',
24 Markup.PITCH : openMark +'P',
25 Markup.VOLUME : openMark +'V',
26 Markup.EMPHASIS: openMark +'E'
27 }
28
29 emphasisVals = {0 : 'none',
30 1 : 'moderate',
31 2 : 'strong'
32 };
33
34 units = {Markup.SILENCE : 'ms',
35 Markup.RATE : '',
36 Markup.PITCH : '%',
37 Markup.VOLUME : '%',
38 Markup.EMPHASIS: ''
39 }
40
41
42
43 ssmlOpener = {
44 Markup.SILENCE : '<break time=',
45 Markup.RATE : '<prosody rate=',
46 Markup.PITCH : '<prosody pitch=',
47 Markup.VOLUME : '<prosody volume=',
48 Markup.EMPHASIS: '<emphasis level='
49 }
50
51 ssmlCLoser = {
52 Markup.SILENCE : ' />',
53 Markup.RATE : '</prosody>',
54 Markup.PITCH : '</prosody>',
55 Markup.VOLUME : '</prosody>',
56 Markup.EMPHASIS: '</emphasis>'
57 }
58
59
60 markupOpeningLen = max(map(len,marks.items()));
61 splitter = re.compile(r'[,;\s!?:.<>]+');
62 letterChecker = re.compile(r'[a-zA-Z]');
63 digitChecker = re.compile(r'[-+]{0,1}[0-9]');
64
65 @staticmethod
66 - def addMarkup(theStr, markupType, startPos, length=None, numWords=None, value=0):
67 '''
68 Given appropriate information, enclose parts of a string in a SpeakEasy speech markup.
69 Note that these are the less intrusive markups (an opening char plus a char identifying
70 the type of markup (speech rate, pitch, volume, level, silence length).
71 @param theStr: phrase containing the substring to be marked.
72 @type theStr: String
73 @param markupType: indicator for what type of markup is intended
74 @type markupType: MarkupManagement.Marks
75 @param startPos: first string index to be inclosed in the mark
76 @type startPos: int
77 @param length: number of chars to be enclosed in the mark. (If used, do not use numWords)
78 @type length: int
79 @param numWords: number of words to be enclosed in the mark (If used, do not use length)
80 @type numWords: int
81 @param value: the magnitude of the mark.
82 @type value: {int | MarkupManagement.emphasisVals}
83 '''
84
85 if len(theStr) == 0:
86 return theStr;
87
88 if length is None:
89 if numWords is None:
90 raise ValueError('Either length or numWords must be provided. Both are None.')
91 length = MarkupManagement.getLenFromNumWords(theStr, startPos, numWords);
92
93 beforeMarkup = theStr[0:startPos].strip();
94 markedStr = theStr[startPos:startPos+length].strip();
95 afterMarkup = theStr[startPos+len(markedStr)+1:].strip();
96
97 newStr = '' if len(beforeMarkup)==0 else beforeMarkup + ' ';
98 newStr +=MarkupManagement.marks[markupType] +\
99 str(value) +\
100 markedStr +\
101 MarkupManagement.closeMark
102 newStr += '' if len(afterMarkup) == 0 else ' ' + afterMarkup;
103 return newStr;
104
105 @staticmethod
107 '''
108 Remove one SpeakEasy markup from a string.
109 @param theStr: string containing markup to remove
110 @type theStr: String
111 @param startPos: position somewhere inside the marked-up text, or right on the opening marker.
112 @type startPos: int
113 @return: a new string with the markup removed.
114 @rtype: String
115 '''
116 markupStartPos = theStr[startPos:].find(MarkupManagement.openMark);
117 if markupStartPos < 0:
118
119 return theStr;
120 markupOpeningEnd = markupStartPos +\
121 MarkupManagement.markupOpeningLen +\
122 MarkupManagement.findFirstNonDigit(theStr[MarkupManagement.markupOpeningLen + markupStartPos:]);
123 beforeMarkup = theStr[0:startPos+markupStartPos];
124 markupEndPos = theStr[startPos:].find(MarkupManagement.closeMark);
125 retStr = '' if len(beforeMarkup) == 0 else beforeMarkup;
126 if markupEndPos < 0:
127
128 retStr += theStr[markupOpeningEnd:];
129 return retStr;
130
131 afterMarkup = theStr[markupEndPos+len(MarkupManagement.closeMark):];
132 retStr += theStr[markupOpeningEnd:markupEndPos];
133 retStr += '' if len(afterMarkup) == 0 else afterMarkup;
134 return retStr;
135
136 @staticmethod
138 '''
139 Given a string and position within the string, Find the immediately enclosing
140 markup, and return its magnitude.
141 @param theStr: string containing the markup under consideration.
142 @type theStr: String
143 @param startPos: index into the string, including the opening char of the mark.
144 @type startPos: int
145 @return: magnitude value of the markup
146 @rtype: int
147 '''
148
149 openMarkPos = MarkupManagement.pointerToEnclosingMarkup(theStr, startPos);
150 if openMarkPos is None:
151 return None;
152
153
154 valueStartIndex = MarkupManagement.isProperMarkupOpening(theStr, openMarkPos);
155
156 numMatch = re.match(r'[-+]{0,1}\d+',theStr[valueStartIndex:]);
157 numStr = numMatch.group(0);
158 return int(numStr);
159
160
161 @staticmethod
163 '''
164 Change magnitude part of an existing markup.
165 @param theStr: string containing the markup under consideration.
166 @type theStr: String
167 @param startPos: index into the marked-up text, including the opening marker.
168 @type startPos: int
169 @param newValue: new value for the markup
170 @type newValue: int
171 @return: a new string with the respective value modified.
172 @rtype: String.
173 '''
174
175
176 openMarkPos = MarkupManagement.pointerToEnclosingMarkup(theStr, startPos);
177 if openMarkPos is None:
178 return None;
179
180
181 valueStartIndex = MarkupManagement.isProperMarkupOpening(theStr, openMarkPos);
182
183 numMatch = re.match(r'[-+]{0,1}\d+',theStr[valueStartIndex:]);
184 numStr = numMatch.group(0);
185 newStr = theStr[0:valueStartIndex] + str(newValue) + theStr[valueStartIndex+len(numStr):];
186 return newStr;
187
188 @staticmethod
190 '''
191 Given a string and a cursor position into it, return the cursor position
192 of the nearest opening markup char.
193 @param theStr: string to examine.
194 @type theStr: String
195 @param cursorPos: starting position of the search.
196 @type cursorPos: int
197 @return: Cursor position resPos such that theStr[resPos] == MarkupManagement.openMark. None if no enclosing openMark is found.
198 @raise ValueError: if passed-in cursorPos is out of range.:
199
200 '''
201
202 if theStr[cursorPos] == MarkupManagement.openMark:
203 return cursorPos;
204 for pos in reversed(range(cursorPos + 1)):
205 if theStr[pos] == MarkupManagement.openMark:
206 return pos;
207 return None
208
209 @staticmethod
211 '''
212 Given a string, a start position within the string, and a number of words,
213 return number of chars to end of start plus numWords words.
214 @param str: string to examine
215 @type str: String
216 @param startPos: start position for counting letters
217 @type startPos: int
218 @param numWords: number of words to include in the count
219 @type numWords: int
220 @return: number of chars between startPos and the end of the numWord's word.
221 @rtype: int
222 '''
223
224 wordsPlusRest = str[startPos:];
225 tokens = MarkupManagement.splitter.split(wordsPlusRest);
226
227 cleanTokens = [];
228 for token in tokens:
229 if len(token) > 0:
230 cleanTokens.append(token);
231 tokens = cleanTokens;
232 searchPat = r'';
233 for word in tokens[0:numWords]:
234 searchPat += '[^a-zA-Z]*' + word;
235 wordSearcher = re.compile(searchPat);
236 substrMatch = wordSearcher.match(str[startPos:]);
237 return substrMatch.end();
238
239 @staticmethod
241 '''
242 Return True if theStr contains a legal speech markup opening at cursorPos.
243 Else throw error with illuminating text.
244 @param theStr: string to check
245 @type theStr: String
246 @param cursorPos: Starting position
247 @type cursorPos: int
248 @return: Index to start of the markup opening's value, if cursorPos points to a legal markup opening within theStr.
249 @rtype: int
250 @raise ValueError: if anything wrong.
251 '''
252
253 if len(theStr) < MarkupManagement.markupOpeningLen:
254 raise ValueError("Given string ('%s') is shorter than minimum markup opening length, which is %d." % (theStr,
255 MarkupManagement.markupOpeningLen));
256
257 if theStr[cursorPos:cursorPos + MarkupManagement.markupOpeningLen] not in MarkupManagement.marks.values():
258 raise ValueError("Markup opening must be one of %s. But it was '%s'." % (str(MarkupManagement.marks.values()),
259 theStr[cursorPos:cursorPos+MarkupManagement.markupOpeningLen]));
260
261 valueStart = None;
262 for pos in range(cursorPos + MarkupManagement.markupOpeningLen, len(theStr)):
263 if MarkupManagement.digitChecker.match(theStr[pos:]) is not None:
264 valueStart = pos;
265 break;
266 elif theStr[pos] == ' ' or theStr[pos] == '\t':
267
268 continue;
269 else:
270
271 break;
272 if valueStart is None:
273 raise ValueError("Bad prosidy markup: markup opening found, but no number follows after index %d in '%s'." % (cursorPos + MarkupManagement.markupOpeningLen,
274 theStr))
275 return valueStart;
276
277 @staticmethod
280
281 @staticmethod
283 for pos, oneChar in enumerate(theStr):
284 if MarkupManagement.digitChecker.match(oneChar) is None:
285 return pos;
286
287 return None;
288
289 @staticmethod
291 '''
292 Given a string with SpeakEasy markups, replace all SpeakEasy markups with official W3C SSML.
293 @param theStr: string containing the markup under consideration.
294 @type theStr: String
295 @return: new string with only SSML markup
296 @rtype: String.
297 '''
298 moreMarkups = True;
299 while moreMarkups:
300 moreMarkups = False;
301 for i,char in enumerate(theStr):
302 if char != MarkupManagement.openMark:
303 continue;
304
305 markupType = theStr[i+1];
306
307 valNum = MarkupManagement.getValue(theStr, i);
308 if markupType == Markup.RATE:
309 val = valNum / 100.;
310 elif markupType == Markup.EMPHASIS:
311 val = MarkupManagement.emphasisVals[valNum];
312 else:
313 val = valNum;
314 units = MarkupManagement.units[markupType];
315 if markupType == Markup.SILENCE:
316 newStr = theStr[0:i] +\
317 MarkupManagement.ssmlOpener[markupType] +\
318 "'" + str(val) + units + "'" +\
319 MarkupManagement.restToSSML(markupType, theStr[i+2+len(str(val)):]);
320 theStr = newStr;
321 moreMarkups = True
322 break;
323 else:
324 newStr = theStr[0:i] +\
325 MarkupManagement.ssmlOpener[markupType] +\
326 "'" + str(val) + units + "'" + '>' +\
327 MarkupManagement.restToSSML(markupType, theStr[i+2+len(str(valNum)):]);
328 theStr = newStr;
329 moreMarkups = True;
330 break;
331 return newStr;
332
333 @staticmethod
339
343
345 self.testStr = "This little light";
346
348 theLen = MarkupManagement.getLenFromNumWords('this is me.', 0, 1);
349 self.assertEqual(theLen, 4, 'Failed find len one word from start. Expected %d, got %d.' % (4, theLen));
350
351 theLen = MarkupManagement.getLenFromNumWords('this is me.', 0, 2);
352 self.assertEqual(theLen, 7, 'Failed find len two words from start. Expected %d, got %d.' % (7, theLen));
353
354 theLen = MarkupManagement.getLenFromNumWords('this is me.', 5, 1);
355 self.assertEqual(theLen, 2, 'Failed find len one word from second word. Expected %d, got %d.' % (2, theLen));
356
357 theLen = MarkupManagement.getLenFromNumWords('this is me.', 5, 2);
358 self.assertEqual(theLen, 5, 'Failed find len two words from second word. Expected %d, got %d.' % (5, theLen));
359
360 theLen = MarkupManagement.getLenFromNumWords('this is me.', 5, 3);
361 self.assertEqual(theLen, 5, 'Failed find len two words from second word. Expected %d, got %d.' % (5, theLen));
362
363 theLen = MarkupManagement.getLenFromNumWords(' this is me.', 0, 1);
364 self.assertEqual(theLen, 6, 'Failed find len one word from start of str with spaces. Expected %d, got %d.' % (6, theLen));
365
366 theLen = MarkupManagement.getLenFromNumWords(' this is me.', 6, 1);
367 self.assertEqual(theLen, 5, 'Failed find len one word from end of 2nd word in string with spaces. Expected %d, got %d.' % (5, theLen));
368
369 theLen = MarkupManagement.getLenFromNumWords(' this is me.', 6, 2);
370 self.assertEqual(theLen, 10, 'Failed find len 2 words from end of 2nd word in string with spaces. Expected %d, got %d.' % (10, theLen));
371
372
375
377 newStr = MarkupManagement.addMarkup(self.testStr, Markup.EMPHASIS, 0, numWords=1, value=10);
378 self.assertEqual(newStr,
379 '%sE10This%s little light' % (MarkupManagement.openMark,
380 MarkupManagement.closeMark),
381 'Failed adding emphasis to first word. Result was "%s".' % newStr);
382
383 newStr = MarkupManagement.addMarkup(self.testStr, Markup.EMPHASIS, 5, numWords=1, value=10);
384 self.assertEqual(newStr,
385 'This %sE10little%s light' % (MarkupManagement.openMark,
386 MarkupManagement.closeMark),
387 'Failed adding emphasis to second word. Result was "%s".' % newStr);
388
389 newStr = MarkupManagement.addMarkup(self.testStr, Markup.EMPHASIS, 5, numWords=2, value=10);
390 self.assertEqual(newStr,
391 'This %sE10little light%s' % (MarkupManagement.openMark,
392 MarkupManagement.closeMark),
393 'Failed adding emphasis to second and third word. Result was "%s".' % newStr);
394
395 newStr = MarkupManagement.addMarkup(self.testStr, Markup.EMPHASIS, 11, numWords=1, value=10);
396 self.assertEqual(newStr,
397 'This little %sE10light%s' % (MarkupManagement.openMark,
398 MarkupManagement.closeMark),
399 'Failed adding emphasis to last word. Result was "%s".' % newStr);
400
401 newStr = MarkupManagement.addMarkup(self.testStr, Markup.EMPHASIS, 0, numWords=3, value=10);
402 self.assertEqual(newStr,
403 '%sE10This little light%s' % (MarkupManagement.openMark,
404 MarkupManagement.closeMark),
405 'Failed adding emphasis to all words. Result was "%s".' % newStr);
406
409
412
413
415 newStr = MarkupManagement.removeMarkup('%sE10This%s little light' % (MarkupManagement.openMark,
416 MarkupManagement.closeMark),
417 0);
418 self.assertEqual(newStr, self.testStr, 'Failed to remove markup from first word. Got "%s"' % newStr);
419
420 newStr = MarkupManagement.removeMarkup('%sE10This little%s light' % (MarkupManagement.openMark,
421 MarkupManagement.closeMark),
422 0);
423 self.assertEqual(newStr, self.testStr, 'Failed to remove markup from first and second word. Got "%s"' % newStr);
424
425 newStr = MarkupManagement.removeMarkup('%sE10This little light%s' % (MarkupManagement.openMark,
426 MarkupManagement.closeMark),
427 0);
428 self.assertEqual(newStr, self.testStr, 'Failed to remove markup from first, second and third word. Got "%s"' % newStr);
429
430 newStr = MarkupManagement.removeMarkup('This %sE10little%s light' % (MarkupManagement.openMark,
431 MarkupManagement.closeMark),
432 0);
433 self.assertEqual(newStr, self.testStr, 'Failed to remove markup from second word. Got "%s"' % newStr);
434
435 newStr = MarkupManagement.removeMarkup('This little light', 0);
436 self.assertEqual(newStr, self.testStr, 'Failed to remove markup when no markup present. Got "%s"' % newStr);
437
438 newStr = MarkupManagement.removeMarkup('%sE10This little light' % (MarkupManagement.openMark),
439 0);
440 self.assertEqual(newStr, self.testStr, 'Failed to remove markup from first word when closing mark missing. Got "%s"' % newStr);
441
443
444 tstStr = '%sE10This%s little light' % (MarkupManagement.openMark,
445 MarkupManagement.closeMark);
446 newStr = MarkupManagement.changeValue(tstStr, 0, 20);
447 self.assertEqual(newStr,
448 '%sE20This%s little light' % (MarkupManagement.openMark,
449 MarkupManagement.closeMark),
450 'Failed to change value with markup first in string. Got "%s".' % newStr);
451
452 tstStr = 'This %sE10little%s light' % (MarkupManagement.openMark,
453 MarkupManagement.closeMark);
454 newStr = MarkupManagement.changeValue(tstStr, 9, 30);
455 self.assertEqual(newStr,
456 'This %sE30little%s light' % (MarkupManagement.openMark,
457 MarkupManagement.closeMark),
458 'Failed to change value with markup second in string. Got "%s".' % newStr);
459
460 tstStr = '%sE40This little light%s' % (MarkupManagement.openMark,
461 MarkupManagement.closeMark);
462 newStr = MarkupManagement.changeValue(tstStr, 9, 50);
463 self.assertEqual(newStr,
464 '%sE50This little light%s' % (MarkupManagement.openMark,
465 MarkupManagement.closeMark),
466 'Failed to change value with markup second in string. Got "%s".' % newStr);
467
469
470
471 tstStr = '%sS10%sThis little light' % (MarkupManagement.openMark,
472 MarkupManagement.closeMark);
473 newStr = MarkupManagement.convertStringToSSML(tstStr);
474 self.assertEqual(newStr, "<break time='10ms' />This little light", "Failed Silence at start of sentence. Got '%s'" % newStr);
475
476 tstStr = 'This little %sS10%s light' % (MarkupManagement.openMark,
477 MarkupManagement.closeMark);
478 newStr = MarkupManagement.convertStringToSSML(tstStr);
479 self.assertEqual(newStr, "This little <break time='10ms' /> light", "Failed Silence in middle of sentence. Got '%s'" % newStr);
480
481
482 tstStr = '%sR10 This%s little light' % (MarkupManagement.openMark,
483 MarkupManagement.closeMark);
484 newStr = MarkupManagement.convertStringToSSML(tstStr);
485 self.assertEqual(newStr, "<prosody rate='0.1'> This</prosody> little light");
486
487 tstStr = 'This little %sR10 light%s' % (MarkupManagement.openMark,
488 MarkupManagement.closeMark);
489 newStr = MarkupManagement.convertStringToSSML(tstStr);
490 self.assertEqual(newStr, "This little <prosody rate='0.1'> light</prosody>");
491
492
493 tstStr = '%sP10 This%s little light' % (MarkupManagement.openMark,
494 MarkupManagement.closeMark);
495 newStr = MarkupManagement.convertStringToSSML(tstStr);
496 self.assertEqual(newStr, "<prosody pitch='10%'> This</prosody> little light");
497
498 tstStr = 'This %sP10little light%s' % (MarkupManagement.openMark,
499 MarkupManagement.closeMark);
500 newStr = MarkupManagement.convertStringToSSML(tstStr);
501 self.assertEqual(newStr, "This <prosody pitch='10%'>little light</prosody>");
502
503
504 tstStr = '%sV10 This%s little light' % (MarkupManagement.openMark,
505 MarkupManagement.closeMark);
506 newStr = MarkupManagement.convertStringToSSML(tstStr);
507 self.assertEqual(newStr, "<prosody volume='10%'> This</prosody> little light");
508
509 tstStr = '%sV-10 This%s little light' % (MarkupManagement.openMark,
510 MarkupManagement.closeMark);
511 newStr = MarkupManagement.convertStringToSSML(tstStr);
512 self.assertEqual(newStr, "<prosody volume='-10%'> This</prosody> little light");
513
514
515 tstStr = '%sE0 This%s little light' % (MarkupManagement.openMark,
516 MarkupManagement.closeMark);
517 newStr = MarkupManagement.convertStringToSSML(tstStr);
518 self.assertEqual(newStr, "<emphasis level='none'> This</emphasis> little light");
519
520 tstStr = '%sE0This little light%s' % (MarkupManagement.openMark,
521 MarkupManagement.closeMark);
522 newStr = MarkupManagement.convertStringToSSML(tstStr);
523 self.assertEqual(newStr, "<emphasis level='none'>This little light</emphasis>");
524
542
543
546
547
548
549 if __name__ == '__main__':
550 unittest.main();
551