Commit 51782715 authored by liugh5's avatar liugh5
Browse files

update

parent 8b4e9acd
from .XmlObj import XmlObj
import xml.etree.ElementTree as ET
from xml.dom import minidom
class Script(XmlObj):
def __init__(self, phoneset, posset):
self.m_phoneset = phoneset
self.m_posset = posset
self.m_items = []
def Save(self, outputXMLPath):
root = ET.Element("script")
root.set("uttcount", str(len(self.m_items)))
root.set("xmlns", "http://schemas.alibaba-inc.com/tts")
for item in self.m_items:
item.Save(root)
xmlstr = minidom.parseString(ET.tostring(root)).toprettyxml(
indent=" ", encoding="utf-8"
)
with open(outputXMLPath, "wb") as f:
f.write(xmlstr)
def SaveMetafile(self):
meta_lines = []
for item in self.m_items:
meta_lines.append(item.SaveMetafile())
return meta_lines
import xml.etree.ElementTree as ET
from .XmlObj import XmlObj
class ScriptItem(XmlObj):
def __init__(self, phoneset, posset):
if phoneset is None or posset is None:
raise Exception("ScriptItem.__init__: phoneset or posset is None")
self.m_phoneset = phoneset
self.m_posset = posset
self.m_id = None
self.m_text = ""
self.m_scriptSentence_list = []
self.m_status = None
def Load(self):
pass
def Save(self, parent_node):
utterance_node = ET.SubElement(parent_node, "utterance")
utterance_node.set("id", self.m_id)
text_node = ET.SubElement(utterance_node, "text")
text_node.text = self.m_text
for sentence in self.m_scriptSentence_list:
sentence.Save(utterance_node)
def SaveMetafile(self):
meta_line = self.m_id + "\t"
for sentence in self.m_scriptSentence_list:
meta_line += sentence.SaveMetafile()
return meta_line
from .XmlObj import XmlObj
import xml.etree.ElementTree as ET
# TODO(jin): Not referenced, temporarily commented
class WrittenSentence(XmlObj):
def __init__(self, posset):
self.m_written_word_list = []
self.m_written_mark_list = []
self.m_posset = posset
self.m_align_list = []
self.m_alignCursor = 0
self.m_accompanyIndex = 0
self.m_sequence = ""
self.m_text = ""
def AddHost(self, writtenWord):
self.m_written_word_list.append(writtenWord)
self.m_align_list.append(self.m_alignCursor)
def LoadHost(self):
pass
def SaveHost(self):
pass
def AddAccompany(self, writtenMark):
self.m_written_mark_list.append(writtenMark)
self.m_alignCursor += 1
self.m_accompanyIndex += 1
def SaveAccompany(self):
pass
def LoadAccompany(self):
pass
# Get the mark span corresponding to specific spoken word
def GetAccompanySpan(self, host_index):
if host_index == -1:
return (0, self.m_align_list[0])
accompany_begin = self.m_align_list[host_index]
accompany_end = (
self.m_align_list[host_index + 1]
if host_index + 1 < len(self.m_written_word_list)
else len(self.m_written_mark_list)
)
return (accompany_begin, accompany_end)
# TODO: iterable
def GetElements(self):
accompany_begin, accompany_end = self.GetAccompanySpan(-1)
res_lst = [
self.m_written_mark_list[i] for i in range(accompany_begin, accompany_end)
]
for j in range(len(self.m_written_word_list)):
accompany_begin, accompany_end = self.GetAccompanySpan(j)
res_lst.extend([self.m_written_word_list[j]])
res_lst.extend(
[
self.m_written_mark_list[i]
for i in range(accompany_begin, accompany_end)
]
)
return res_lst
def BuildSequence(self):
self.m_sequence = " ".join([str(ele) for ele in self.GetElements()])
def BuildText(self):
self.m_text = "".join([str(ele) for ele in self.GetElements()])
class SpokenSentence(XmlObj):
def __init__(self, phoneset):
self.m_spoken_word_list = []
self.m_spoken_mark_list = []
self.m_phoneset = phoneset
self.m_align_list = []
self.m_alignCursor = 0
self.m_accompanyIndex = 0
self.m_sequence = ""
self.m_text = ""
def __len__(self):
return len(self.m_spoken_word_list)
def AddHost(self, spokenWord):
self.m_spoken_word_list.append(spokenWord)
self.m_align_list.append(self.m_alignCursor)
def SaveHost(self):
pass
def LoadHost(self):
pass
def AddAccompany(self, spokenMark):
self.m_spoken_mark_list.append(spokenMark)
self.m_alignCursor += 1
self.m_accompanyIndex += 1
def SaveAccompany(self):
pass
# Get the mark span corresponding to specific spoken word
def GetAccompanySpan(self, host_index):
if host_index == -1:
return (0, self.m_align_list[0])
accompany_begin = self.m_align_list[host_index]
accompany_end = (
self.m_align_list[host_index + 1]
if host_index + 1 < len(self.m_spoken_word_list)
else len(self.m_spoken_mark_list)
)
return (accompany_begin, accompany_end)
# TODO: iterable
def GetElements(self):
accompany_begin, accompany_end = self.GetAccompanySpan(-1)
res_lst = [
self.m_spoken_mark_list[i] for i in range(accompany_begin, accompany_end)
]
for j in range(len(self.m_spoken_word_list)):
accompany_begin, accompany_end = self.GetAccompanySpan(j)
res_lst.extend([self.m_spoken_word_list[j]])
res_lst.extend(
[
self.m_spoken_mark_list[i]
for i in range(accompany_begin, accompany_end)
]
)
return res_lst
def LoadAccompany(self):
pass
def BuildSequence(self):
self.m_sequence = " ".join([str(ele) for ele in self.GetElements()])
def BuildText(self):
self.m_text = "".join([str(ele) for ele in self.GetElements()])
def Save(self, parent_node):
spoken_node = ET.SubElement(parent_node, "spoken")
spoken_node.set("wordcount", str(len(self.m_spoken_word_list)))
text_node = ET.SubElement(spoken_node, "text")
text_node.text = self.m_sequence
# TODO: spoken mark might be used
for word in self.m_spoken_word_list:
word.Save(spoken_node)
def SaveMetafile(self):
meta_line_list = [word.SaveMetafile() for word in self.m_spoken_word_list]
return " ".join(meta_line_list)
class ScriptSentence(XmlObj):
def __init__(self, phoneset, posset):
self.m_phoneset = phoneset
self.m_posset = posset
self.m_writtenSentence = WrittenSentence(posset)
self.m_spokenSentence = SpokenSentence(phoneset)
self.m_text = ""
def Save(self, parent_node):
if len(self.m_spokenSentence) > 0:
self.m_spokenSentence.Save(parent_node)
def SaveMetafile(self):
if len(self.m_spokenSentence) > 0:
return self.m_spokenSentence.SaveMetafile()
else:
return ""
import xml.etree.ElementTree as ET
from .XmlObj import XmlObj
from .core_types import Language
from .Syllable import SyllableList
# TODO(Jin): Not referenced, temporarily commented
class WrittenWord(XmlObj):
def __init__(self):
self.m_name = None
self.m_POS = None
def __str__(self):
return self.m_name
def Load(self):
pass
def Save(self):
pass
class WrittenMark(XmlObj):
def __init__(self):
self.m_punctuation = None
def __str__(self):
return self.m_punctuation
def Load(self):
pass
def Save(self):
pass
class SpokenWord(XmlObj):
def __init__(self):
self.m_name = None
self.m_language = None
self.m_syllable_list = []
self.m_breakText = "1"
self.m_POS = "0"
def __str__(self):
return self.m_name
def Load(self):
pass
def Save(self, parent_node):
word_node = ET.SubElement(parent_node, "word")
name_node = ET.SubElement(word_node, "name")
name_node.text = self.m_name
if (
len(self.m_syllable_list) > 0
and self.m_syllable_list[0].m_language != Language.Neutral
):
language_node = ET.SubElement(word_node, "lang")
language_node.text = self.m_syllable_list[0].m_language.name
SyllableList(self.m_syllable_list).Save(word_node)
break_node = ET.SubElement(word_node, "break")
break_node.text = self.m_breakText
POS_node = ET.SubElement(word_node, "POS")
POS_node.text = self.m_POS
return
def SaveMetafile(self):
word_phone_cnt = sum(
[syllable.PhoneCount() for syllable in self.m_syllable_list]
)
word_syllable_cnt = len(self.m_syllable_list)
single_syllable_word = word_syllable_cnt == 1
meta_line_list = []
for idx, syll in enumerate(self.m_syllable_list):
if word_phone_cnt == 1:
word_pos = "word_both"
elif idx == 0:
word_pos = "word_begin"
elif idx == len(self.m_syllable_list) - 1:
word_pos = "word_end"
else:
word_pos = "word_middle"
meta_line_list.append(
syll.SaveMetafile(word_pos, single_syllable_word=single_syllable_word)
)
if self.m_breakText != "0" and self.m_breakText is not None:
meta_line_list.append(
"{{#{}$tone_none$s_none$word_none}}".format(self.m_breakText)
)
return " ".join(meta_line_list)
class SpokenMark(XmlObj):
def __init__(self):
self.m_breakLevel = None
def BreakLevel2Text(self):
return "#" + str(self.m_breakLevel.value)
def __str__(self):
return self.BreakLevel2Text()
def Load(self):
pass
def Save(self):
pass
import xml.etree.ElementTree as ET
from .XmlObj import XmlObj
class Syllable(XmlObj):
def __init__(self):
self.m_phone_list = []
self.m_tone = None
self.m_language = None
self.m_breaklevel = None
def PronunciationText(self):
return " ".join([str(phone) for phone in self.m_phone_list])
def PhoneCount(self):
return len(self.m_phone_list)
def ToneText(self):
return str(self.m_tone.value)
def Save(self):
pass
def Load(self):
pass
def GetPhoneMeta(
self, phone_name, word_pos, syll_pos, tone_text, single_syllable_word=False
):
# Special case: word with single syllable, the last phone's word_pos should be "word_end"
if word_pos == "word_begin" and syll_pos == "s_end" and single_syllable_word:
word_pos = "word_end"
elif word_pos == "word_begin" and syll_pos not in [
"s_begin",
"s_both",
]: # FIXME: keep accord with Engine logic
word_pos = "word_middle"
elif word_pos == "word_end" and syll_pos not in ["s_end", "s_both"]:
word_pos = "word_middle"
else:
pass
return "{{{}$tone{}${}${}}}".format(phone_name, tone_text, syll_pos, word_pos)
def SaveMetafile(self, word_pos, single_syllable_word=False):
syllable_phone_cnt = len(self.m_phone_list)
meta_line_list = []
for idx, phone in enumerate(self.m_phone_list):
if syllable_phone_cnt == 1:
syll_pos = "s_both"
elif idx == 0:
syll_pos = "s_begin"
elif idx == len(self.m_phone_list) - 1:
syll_pos = "s_end"
else:
syll_pos = "s_middle"
meta_line_list.append(
self.GetPhoneMeta(
phone,
word_pos,
syll_pos,
self.ToneText(),
single_syllable_word=single_syllable_word,
)
)
return " ".join(meta_line_list)
class SyllableList(XmlObj):
def __init__(self, syllables):
self.m_syllable_list = syllables
def __len__(self):
return len(self.m_syllable_list)
def __index__(self, index):
return self.m_syllable_list[index]
def PronunciationText(self):
return " - ".join(
[syllable.PronunciationText() for syllable in self.m_syllable_list]
)
def ToneText(self):
return "".join([syllable.ToneText() for syllable in self.m_syllable_list])
def Save(self, parent_node):
syllable_node = ET.SubElement(parent_node, "syllable")
syllable_node.set("syllcount", str(len(self.m_syllable_list)))
phone_node = ET.SubElement(syllable_node, "phone")
phone_node.text = self.PronunciationText()
tone_node = ET.SubElement(syllable_node, "tone")
tone_node.text = self.ToneText()
return
def Load(self):
pass
import re
import logging
from .utils import NgBreakPattern
from .Syllable import Syllable
from .core_types import Language, Tone, PhoneCVType
class DefaultSyllableFormatter:
def __init__(self):
return
def Format(self, phoneset, pronText, syllable_list):
logging.warning("Using DefaultSyllableFormatter dry run: %s", pronText)
return True
RegexNg2en = re.compile(NgBreakPattern)
RegexQingSheng = re.compile(r"([1-5]5)")
RegexPron = re.compile(r"(?P<Pron>[a-z]+)(?P<Tone>[1-6])")
class ZhCNSyllableFormatter:
def __init__(self, sy2ph_map):
self.m_sy2ph_map = sy2ph_map
def NormalizePron(self, pronText):
# Replace Qing Sheng
newPron = pronText.replace("6", "2")
newPron = re.sub(RegexQingSheng, "5", newPron)
# FIXME(Jin): ng case overrides newPron
match = RegexNg2en.search(newPron)
if match:
newPron = "en" + match.group("break")
return newPron
def Format(self, phoneset, pronText, syllable_list):
if phoneset is None or syllable_list is None or pronText is None:
logging.error("ZhCNSyllableFormatter.Format: invalid input")
return False
pronText = self.NormalizePron(pronText)
if pronText in self.m_sy2ph_map:
phone_list = self.m_sy2ph_map[pronText].split(" ")
if len(phone_list) == 3:
syll = Syllable()
for phone in phone_list:
syll.m_phone_list.append(phone)
syll.m_tone = Tone.parse(
pronText[-1]
) # FIXME(Jin): assume tone is the last char
syll.m_language = Language.ZhCN
syllable_list.append(syll)
return True
else:
logging.error(
"ZhCNSyllableFormatter.Format: invalid pronText: %s", pronText
)
return False
else:
logging.error(
"ZhCNSyllableFormatter.Format: syllable to phone map missing key: %s",
pronText,
)
return False
class PinYinSyllableFormatter:
def __init__(self, sy2ph_map):
self.m_sy2ph_map = sy2ph_map
def NormalizePron(self, pronText):
newPron = pronText.replace("6", "2")
newPron = re.sub(RegexQingSheng, "5", newPron)
# FIXME(Jin): ng case overrides newPron
match = RegexNg2en.search(newPron)
if match:
newPron = "en" + match.group("break")
return newPron
def Format(self, phoneset, pronText, syllable_list):
if phoneset is None or syllable_list is None or pronText is None:
logging.error("PinYinSyllableFormatter.Format: invalid input")
return False
pronText = self.NormalizePron(pronText)
match = RegexPron.search(pronText)
if match:
pron = match.group("Pron")
tone = match.group("Tone")
else:
logging.error(
"PinYinSyllableFormatter.Format: pronunciation is not valid: %s",
pronText,
)
return False
if pron in self.m_sy2ph_map:
phone_list = self.m_sy2ph_map[pron].split(" ")
if len(phone_list) in [1, 2]:
syll = Syllable()
for phone in phone_list:
syll.m_phone_list.append(phone)
syll.m_tone = Tone.parse(tone)
syll.m_language = Language.PinYin
syllable_list.append(syll)
return True
else:
logging.error("PinYinSyllableFormatter.Format: invalid phone: %s", pron)
return False
else:
logging.error(
"PinYinSyllableFormatter.Format: syllable to phone map missing key: %s",
pron,
)
return False
class ZhHKSyllableFormatter:
def __init__(self, sy2ph_map):
self.m_sy2ph_map = sy2ph_map
def Format(self, phoneset, pronText, syllable_list):
if phoneset is None or syllable_list is None or pronText is None:
logging.error("ZhHKSyllableFormatter.Format: invalid input")
return False
match = RegexPron.search(pronText)
if match:
pron = match.group("Pron")
tone = match.group("Tone")
else:
logging.error(
"ZhHKSyllableFormatter.Format: pronunciation is not valid: %s", pronText
)
return False
if pron in self.m_sy2ph_map:
phone_list = self.m_sy2ph_map[pron].split(" ")
if len(phone_list) in [1, 2]:
syll = Syllable()
for phone in phone_list:
syll.m_phone_list.append(phone)
syll.m_tone = Tone.parse(tone)
syll.m_language = Language.ZhHK
syllable_list.append(syll)
return True
else:
logging.error("ZhHKSyllableFormatter.Format: invalid phone: %s", pron)
return False
else:
logging.error(
"ZhHKSyllableFormatter.Format: syllable to phone map missing key: %s",
pron,
)
return False
class WuuShanghaiSyllableFormatter:
def __init__(self, sy2ph_map):
self.m_sy2ph_map = sy2ph_map
def Format(self, phoneset, pronText, syllable_list):
if phoneset is None or syllable_list is None or pronText is None:
logging.error("WuuShanghaiSyllableFormatter.Format: invalid input")
return False
match = RegexPron.search(pronText)
if match:
pron = match.group("Pron")
tone = match.group("Tone")
else:
logging.error(
"WuuShanghaiSyllableFormatter.Format: pronunciation is not valid: %s",
pronText,
)
return False
if pron in self.m_sy2ph_map:
phone_list = self.m_sy2ph_map[pron].split(" ")
if len(phone_list) in [1, 2]:
syll = Syllable()
for phone in phone_list:
syll.m_phone_list.append(phone)
syll.m_tone = Tone.parse(tone)
syll.m_language = Language.WuuShanghai
syllable_list.append(syll)
return True
else:
logging.error(
"WuuShanghaiSyllableFormatter.Format: invalid phone: %s", pron
)
return False
else:
logging.error(
"WuuShanghaiSyllableFormatter.Format: syllable to phone map missing key: %s",
pron,
)
return False
class SichuanSyllableFormatter:
def __init__(self, sy2ph_map):
self.m_sy2ph_map = sy2ph_map
def Format(self, phoneset, pronText, syllable_list):
if phoneset is None or syllable_list is None or pronText is None:
logging.error("SichuanSyllableFormatter.Format: invalid input")
return False
match = RegexPron.search(pronText)
if match:
pron = match.group("Pron")
tone = match.group("Tone")
else:
logging.error(
"SichuanSyllableFormatter.Format: pronunciation is not valid: %s",
pronText,
)
return False
if pron in self.m_sy2ph_map:
phone_list = self.m_sy2ph_map[pron].split(" ")
if len(phone_list) in [1, 2]:
syll = Syllable()
for phone in phone_list:
syll.m_phone_list.append(phone)
syll.m_tone = Tone.parse(tone)
syll.m_language = Language.Sichuan
syllable_list.append(syll)
return True
else:
logging.error(
"SichuanSyllableFormatter.Format: invalid phone: %s", pron
)
return False
else:
logging.error(
"SichuanSyllableFormatter.Format: syllable to phone map missing key: %s",
pron,
)
return False
class EnXXSyllableFormatter:
def __init__(self, language):
self.m_f2t_map = None
self.m_language = language
def NormalizePron(self, pronText):
newPron = pronText.replace("#", ".")
newPron = (
newPron.replace("03", "0")
.replace("13", "1")
.replace("23", "2")
.replace("3", "")
)
newPron = newPron.replace("2", "0")
return newPron
def Format(self, phoneset, pronText, syllable_list):
if phoneset is None or syllable_list is None or pronText is None:
logging.error("EnXXSyllableFormatter.Format: invalid input")
return False
pronText = self.NormalizePron(pronText)
syllables = [ele.strip() for ele in pronText.split(".")]
for i in range(len(syllables)):
syll = Syllable()
syll.m_language = self.m_language
syll.m_tone = Tone.parse("0")
phones = re.split(r"[\s]+", syllables[i])
for j in range(len(phones)):
phoneName = phones[j].lower()
toneName = "0"
if "0" in phoneName or "1" in phoneName or "2" in phoneName:
toneName = phoneName[-1]
phoneName = phoneName[:-1]
phoneName_lst = None
if self.m_f2t_map is not None:
phoneName_lst = self.m_f2t_map.get(phoneName, None)
if phoneName_lst is None:
phoneName_lst = [phoneName]
for new_phoneName in phoneName_lst:
phone_obj = phoneset.m_name_map.get(new_phoneName, None)
if phone_obj is None:
logging.error(
"EnXXSyllableFormatter.Format: phone %s not found",
new_phoneName,
)
return False
phone_obj.m_name = new_phoneName
syll.m_phone_list.append(phone_obj)
if phone_obj.m_cv_type == PhoneCVType.Vowel:
syll.m_tone = Tone.parse(toneName)
if j == len(phones) - 1:
phone_obj.m_bnd = True
syllable_list.append(syll)
return True
class XmlObj:
def __init__(self):
pass
def Load(self):
pass
def Save(self):
pass
def LoadData(self):
pass
def SaveData(self):
pass
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment