Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
sambert-hifigan_pytorch
Commits
ee10550a
Commit
ee10550a
authored
Feb 06, 2024
by
liugh5
Browse files
Initial commit
parents
Pipeline
#790
canceled with stages
Changes
197
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1622 additions
and
0 deletions
+1622
-0
kantts/preprocess/script_convertor/TextScriptConvertor.py
kantts/preprocess/script_convertor/TextScriptConvertor.py
+608
-0
kantts/preprocess/script_convertor/__init__.py
kantts/preprocess/script_convertor/__init__.py
+0
-0
kantts/preprocess/script_convertor/__pycache__/TextScriptConvertor.cpython-38.pyc
..._convertor/__pycache__/TextScriptConvertor.cpython-38.pyc
+0
-0
kantts/preprocess/script_convertor/__pycache__/__init__.cpython-38.pyc
...cess/script_convertor/__pycache__/__init__.cpython-38.pyc
+0
-0
kantts/preprocess/script_convertor/core/Phone.py
kantts/preprocess/script_convertor/core/Phone.py
+44
-0
kantts/preprocess/script_convertor/core/PhoneSet.py
kantts/preprocess/script_convertor/core/PhoneSet.py
+52
-0
kantts/preprocess/script_convertor/core/Pos.py
kantts/preprocess/script_convertor/core/Pos.py
+40
-0
kantts/preprocess/script_convertor/core/PosSet.py
kantts/preprocess/script_convertor/core/PosSet.py
+72
-0
kantts/preprocess/script_convertor/core/Script.py
kantts/preprocess/script_convertor/core/Script.py
+33
-0
kantts/preprocess/script_convertor/core/ScriptItem.py
kantts/preprocess/script_convertor/core/ScriptItem.py
+37
-0
kantts/preprocess/script_convertor/core/ScriptSentence.py
kantts/preprocess/script_convertor/core/ScriptSentence.py
+186
-0
kantts/preprocess/script_convertor/core/ScriptWord.py
kantts/preprocess/script_convertor/core/ScriptWord.py
+119
-0
kantts/preprocess/script_convertor/core/Syllable.py
kantts/preprocess/script_convertor/core/Syllable.py
+104
-0
kantts/preprocess/script_convertor/core/SyllableFormatter.py
kantts/preprocess/script_convertor/core/SyllableFormatter.py
+312
-0
kantts/preprocess/script_convertor/core/XmlObj.py
kantts/preprocess/script_convertor/core/XmlObj.py
+15
-0
kantts/preprocess/script_convertor/core/__init__.py
kantts/preprocess/script_convertor/core/__init__.py
+0
-0
kantts/preprocess/script_convertor/core/__pycache__/Phone.cpython-38.pyc
...ss/script_convertor/core/__pycache__/Phone.cpython-38.pyc
+0
-0
kantts/preprocess/script_convertor/core/__pycache__/PhoneSet.cpython-38.pyc
...script_convertor/core/__pycache__/PhoneSet.cpython-38.pyc
+0
-0
kantts/preprocess/script_convertor/core/__pycache__/Pos.cpython-38.pyc
...cess/script_convertor/core/__pycache__/Pos.cpython-38.pyc
+0
-0
kantts/preprocess/script_convertor/core/__pycache__/PosSet.cpython-38.pyc
...s/script_convertor/core/__pycache__/PosSet.cpython-38.pyc
+0
-0
No files found.
kantts/preprocess/script_convertor/TextScriptConvertor.py
0 → 100644
View file @
ee10550a
import
logging
import
os
from
tqdm
import
tqdm
from
.core.Script
import
Script
from
.core.ScriptItem
import
ScriptItem
from
.core.ScriptSentence
import
ScriptSentence
from
.core.SyllableFormatter
import
(
ZhCNSyllableFormatter
,
ZhHKSyllableFormatter
,
PinYinSyllableFormatter
,
WuuShanghaiSyllableFormatter
,
SichuanSyllableFormatter
,
EnXXSyllableFormatter
,
)
from
.core.ScriptWord
import
SpokenWord
,
SpokenMark
,
WrittenWord
,
WrittenMark
from
.core.PhoneSet
import
PhoneSet
from
.core.PosSet
import
PosSet
from
.core.core_types
import
Language
,
BreakLevel
from
.core.utils
import
(
RegexID
,
RegexSentence
,
RegexForeignLang
,
RegexNeutralTone
,
format_prosody
,
)
import
argparse
import
re
from
bitstring
import
BitArray
logging
.
basicConfig
(
format
=
"%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s"
,
datefmt
=
"%Y-%m-%d:%H:%M:%S"
,
level
=
logging
.
DEBUG
,
)
class
TextScriptConvertor
:
def
__init__
(
self
,
phoneset_path
,
posset_path
,
target_lang
,
foreign_lang
,
f2t_map_path
,
s2p_map_path
,
m_emo_tag_path
,
m_speaker
,
):
self
.
m_f2p_map
=
{}
self
.
m_s2p_map
=
{}
self
.
m_phoneset
=
PhoneSet
(
phoneset_path
)
self
.
m_posset
=
PosSet
(
posset_path
)
self
.
m_target_lang
=
Language
.
parse
(
target_lang
)
self
.
m_foreign_lang
=
Language
.
parse
(
foreign_lang
)
self
.
m_emo_tag_path
=
m_emo_tag_path
self
.
m_speaker
=
m_speaker
self
.
LoadF2TMap
(
f2t_map_path
)
self
.
LoadS2PMap
(
s2p_map_path
)
if
m_emo_tag_path
is
not
None
:
self
.
m_emo_dict
=
self
.
parse_emo_tag
(
m_emo_tag_path
)
else
:
self
.
m_emo_dict
=
{}
self
.
m_target_lang_syllable_formatter
=
self
.
InitSyllableFormatter
(
self
.
m_target_lang
)
self
.
m_foreign_lang_syllable_formatter
=
self
.
InitSyllableFormatter
(
self
.
m_foreign_lang
)
def
parse_emo_tag
(
self
,
emo_tag_path
):
with
open
(
emo_tag_path
,
"r"
)
as
f
:
lines
=
f
.
readlines
()
emo_tag
=
{}
for
line
in
lines
:
line
=
line
.
strip
()
elements
=
line
.
split
()
if
len
(
elements
)
!=
2
:
logging
.
error
(
"TextScriptConvertor.parse_emo_tag: invalid line: %s"
,
line
)
continue
key
=
elements
[
0
]
value
=
elements
[
1
]
if
key
in
emo_tag
:
logging
.
warning
(
"TextScriptConvertor.parse_emo_tag: duplicate key: %s"
,
key
)
emo_tag
[
key
]
=
value
return
emo_tag
def
parse_sentence
(
self
,
sentence
,
line_num
):
script_item
=
ScriptItem
(
self
.
m_phoneset
,
self
.
m_posset
)
script_sentence
=
ScriptSentence
(
self
.
m_phoneset
,
self
.
m_posset
)
script_item
.
m_scriptSentence_list
.
append
(
script_sentence
)
written_sentence
=
script_sentence
.
m_writtenSentence
spoken_sentence
=
script_sentence
.
m_spokenSentence
position
=
0
sentence
=
sentence
.
strip
()
# Get ID
match
=
re
.
search
(
RegexID
,
sentence
)
if
match
is
None
:
logging
.
error
(
"TextScriptConvertor.parse_sentence:invalid line: %s,
\
line ID is needed"
,
line_num
,
)
return
None
else
:
sentence_id
=
match
.
group
(
"ID"
)
script_item
.
m_id
=
sentence_id
position
+=
match
.
end
()
prevSpokenWord
=
SpokenWord
()
prevWord
=
False
lastBreak
=
False
for
m
in
re
.
finditer
(
RegexSentence
,
sentence
[
position
:]):
if
m
is
None
:
logging
.
error
(
"TextScriptConvertor.parse_sentence:
\
invalid line: %s, there is no matched pattern"
,
line_num
,
)
return
None
if
m
.
group
(
"Word"
)
is
not
None
:
wordName
=
m
.
group
(
"Word"
)
written_word
=
WrittenWord
()
written_word
.
m_name
=
wordName
written_sentence
.
AddHost
(
written_word
)
spoken_word
=
SpokenWord
()
spoken_word
.
m_name
=
wordName
prevSpokenWord
=
spoken_word
prevWord
=
True
lastBreak
=
False
elif
m
.
group
(
"Break"
)
is
not
None
:
breakText
=
m
.
group
(
"BreakLevel"
)
if
len
(
breakText
)
==
0
:
breakLevel
=
BreakLevel
.
L1
else
:
breakLevel
=
BreakLevel
.
parse
(
breakText
)
if
prevWord
:
prevSpokenWord
.
m_breakText
=
breakText
spoken_sentence
.
AddHost
(
prevSpokenWord
)
if
breakLevel
!=
BreakLevel
.
L1
:
spokenMark
=
SpokenMark
()
spokenMark
.
m_breakLevel
=
breakLevel
spoken_sentence
.
AddAccompany
(
spokenMark
)
lastBreak
=
True
elif
m
.
group
(
"PhraseTone"
)
is
not
None
:
# TODO: PhraseTonePattern
pass
elif
m
.
group
(
"POS"
)
is
not
None
:
POSClass
=
m
.
group
(
"POSClass"
)
if
prevWord
:
prevSpokenWord
.
m_pos
=
POSClass
prevWord
=
False
elif
m
.
group
(
"Mark"
)
is
not
None
:
markText
=
m
.
group
(
"Mark"
)
writtenMark
=
WrittenMark
()
writtenMark
.
m_punctuation
=
markText
written_sentence
.
AddAccompany
(
writtenMark
)
else
:
logging
.
error
(
"TextScriptConvertor.parse_sentence:
\
invalid line: %s, matched pattern is unrecognized"
,
line_num
,
)
return
None
if
not
lastBreak
:
prevSpokenWord
.
m_breakText
=
"4"
spoken_sentence
.
AddHost
(
prevSpokenWord
)
spoken_word_cnt
=
len
(
spoken_sentence
.
m_spoken_word_list
)
spoken_mark_cnt
=
len
(
spoken_sentence
.
m_spoken_mark_list
)
if
(
spoken_word_cnt
>
0
and
spoken_sentence
.
m_align_list
[
spoken_word_cnt
-
1
]
==
spoken_mark_cnt
):
spokenMark
=
SpokenMark
()
spokenMark
.
m_breakLevel
=
BreakLevel
.
L4
spoken_sentence
.
AddAccompany
(
spokenMark
)
written_sentence
.
BuildSequence
()
spoken_sentence
.
BuildSequence
()
written_sentence
.
BuildText
()
spoken_sentence
.
BuildText
()
script_sentence
.
m_text
=
written_sentence
.
m_text
script_item
.
m_text
=
written_sentence
.
m_text
return
script_item
def
FormatSyllable
(
self
,
pron
,
syllable_list
):
isForeign
=
RegexForeignLang
.
search
(
pron
)
is
not
None
if
self
.
m_foreign_lang_syllable_formatter
is
not
None
and
isForeign
:
return
self
.
m_foreign_lang_syllable_formatter
.
Format
(
self
.
m_phoneset
,
pron
,
syllable_list
)
else
:
return
self
.
m_target_lang_syllable_formatter
.
Format
(
self
.
m_phoneset
,
pron
,
syllable_list
)
def
GetWordProns
(
self
,
pronText
):
prons
=
pronText
.
split
(
"/"
)
res
=
[]
for
pron
in
prons
:
if
re
.
search
(
RegexForeignLang
,
pron
):
res
.
append
(
pron
.
strip
())
else
:
res
.
extend
(
pron
.
strip
().
split
(
" "
))
return
res
def
IsErHuaYin
(
self
,
pron
):
pron
=
RegexNeutralTone
.
sub
(
"5"
,
pron
)
pron
=
pron
[:
-
1
]
return
pron
[
-
1
]
==
"r"
and
pron
!=
"er"
def
parse_pronunciation
(
self
,
script_item
,
pronunciation
,
line_num
):
spoken_sentence
=
script_item
.
m_scriptSentence_list
[
0
].
m_spokenSentence
wordProns
=
self
.
GetWordProns
(
pronunciation
)
wordIndex
=
0
pronIndex
=
0
succeed
=
True
while
pronIndex
<
len
(
wordProns
):
language
=
Language
.
Neutral
syllable_list
=
[]
pron
=
wordProns
[
pronIndex
].
strip
()
succeed
=
self
.
FormatSyllable
(
pron
,
syllable_list
)
if
not
succeed
:
logging
.
error
(
"TextScriptConvertor.parse_pronunciation:
\
invalid line: %s, error pronunciation: %s,
\
syllable format error"
,
line_num
,
pron
,
)
return
False
language
=
syllable_list
[
0
].
m_language
if
wordIndex
<
len
(
spoken_sentence
.
m_spoken_word_list
):
if
language
in
[
Language
.
EnGB
,
Language
.
EnUS
]:
spoken_sentence
.
m_spoken_word_list
[
wordIndex
].
m_syllable_list
.
extend
(
syllable_list
)
wordIndex
+=
1
pronIndex
+=
1
elif
language
in
[
Language
.
ZhCN
,
Language
.
PinYin
,
Language
.
ZhHK
,
Language
.
WuuShanghai
,
Language
.
Sichuan
,
]:
charCount
=
len
(
spoken_sentence
.
m_spoken_word_list
[
wordIndex
].
m_name
)
if
(
language
in
[
Language
.
ZhCN
,
Language
.
PinYin
,
Language
.
Sichuan
]
and
self
.
IsErHuaYin
(
pron
)
and
"儿"
in
spoken_sentence
.
m_spoken_word_list
[
wordIndex
].
m_name
):
spoken_sentence
.
m_spoken_word_list
[
wordIndex
].
m_name
=
spoken_sentence
.
m_spoken_word_list
[
wordIndex
].
m_name
.
replace
(
"儿"
,
""
)
charCount
-=
1
if
charCount
==
1
:
spoken_sentence
.
m_spoken_word_list
[
wordIndex
].
m_syllable_list
.
extend
(
syllable_list
)
wordIndex
+=
1
pronIndex
+=
1
else
:
# FIXME(Jin): Just skip the first char then match the rest char.
i
=
1
while
i
>=
1
and
i
<
charCount
:
pronIndex
+=
1
if
pronIndex
<
len
(
wordProns
):
pron
=
wordProns
[
pronIndex
].
strip
()
succeed
=
self
.
FormatSyllable
(
pron
,
syllable_list
)
if
not
succeed
:
logging
.
error
(
"TextScriptConvertor.parse_pronunciation: invalid line: %s,
\
error pronunciation: %s, syllable format error"
,
line_num
,
pron
,
)
return
False
if
(
language
in
[
Language
.
ZhCN
,
Language
.
PinYin
,
Language
.
Sichuan
,
]
and
self
.
IsErHuaYin
(
pron
)
and
"儿"
in
spoken_sentence
.
m_spoken_word_list
[
wordIndex
].
m_name
):
spoken_sentence
.
m_spoken_word_list
[
wordIndex
].
m_name
=
spoken_sentence
.
m_spoken_word_list
[
wordIndex
].
m_name
.
replace
(
"儿"
,
""
)
charCount
-=
1
else
:
logging
.
error
(
"TextScriptConvertor.parse_pronunciation: invalid line: %s,
\
error pronunciation: %s, Word count mismatch with Pron count"
,
line_num
,
pron
,
)
return
False
i
+=
1
spoken_sentence
.
m_spoken_word_list
[
wordIndex
].
m_syllable_list
.
extend
(
syllable_list
)
wordIndex
+=
1
pronIndex
+=
1
else
:
logging
.
error
(
"TextScriptConvertor.parse_pronunciation: invalid line: %s,
\
unsupported language: %s"
,
line_num
,
language
.
name
,
)
return
False
else
:
logging
.
error
(
"TextScriptConvertor.parse_pronunciation: invalid line: %s,
\
error pronunciation: %s, word index is out of range"
,
line_num
,
pron
,
)
return
False
if
pronIndex
!=
len
(
wordProns
):
logging
.
error
(
"TextScriptConvertor.parse_pronunciation: invalid line: %s,
\
error pronunciation: %s, pron count mismatch with word count"
,
line_num
,
pron
,
)
return
False
if
wordIndex
!=
len
(
spoken_sentence
.
m_spoken_word_list
):
logging
.
error
(
"TextScriptConvertor.parse_pronunciation: invalid line: %s,
\
error pronunciation: %s, word count mismatch with word index"
,
line_num
,
pron
,
)
return
False
return
True
def
LoadF2TMap
(
self
,
file_path
):
with
open
(
file_path
,
"r"
)
as
f
:
for
line
in
f
.
readlines
():
line
=
line
.
strip
()
elements
=
line
.
split
(
"
\t
"
)
if
len
(
elements
)
!=
2
:
logging
.
error
(
"TextScriptConvertor.LoadF2TMap: invalid line: %s"
,
line
)
continue
key
=
elements
[
0
]
value
=
elements
[
1
]
value_list
=
value
.
split
(
" "
)
if
key
in
self
.
m_f2p_map
:
logging
.
error
(
"TextScriptConvertor.LoadF2TMap: duplicate key: %s"
,
key
)
self
.
m_f2p_map
[
key
]
=
value_list
def
LoadS2PMap
(
self
,
file_path
):
with
open
(
file_path
,
"r"
)
as
f
:
for
line
in
f
.
readlines
():
line
=
line
.
strip
()
elements
=
line
.
split
(
"
\t
"
)
if
len
(
elements
)
!=
2
:
logging
.
error
(
"TextScriptConvertor.LoadS2PMap: invalid line: %s"
,
line
)
continue
key
=
elements
[
0
]
value
=
elements
[
1
]
if
key
in
self
.
m_s2p_map
:
logging
.
error
(
"TextScriptConvertor.LoadS2PMap: duplicate key: %s"
,
key
)
self
.
m_s2p_map
[
key
]
=
value
def
InitSyllableFormatter
(
self
,
targetLang
):
if
targetLang
==
Language
.
ZhCN
:
if
len
(
self
.
m_s2p_map
)
==
0
:
logging
.
error
(
"TextScriptConvertor.InitSyllableFormatter: ZhCN syllable to phone map is empty"
)
return
None
return
ZhCNSyllableFormatter
(
self
.
m_s2p_map
)
elif
targetLang
==
Language
.
PinYin
:
if
len
(
self
.
m_s2p_map
)
==
0
:
logging
.
error
(
"TextScriptConvertor.InitSyllableFormatter: PinYin syllable to phone map is empty"
)
return
None
return
PinYinSyllableFormatter
(
self
.
m_s2p_map
)
elif
targetLang
==
Language
.
ZhHK
:
if
len
(
self
.
m_s2p_map
)
==
0
:
logging
.
error
(
"TextScriptConvertor.InitSyllableFormatter: ZhHK syllable to phone map is empty"
)
return
None
return
ZhHKSyllableFormatter
(
self
.
m_s2p_map
)
elif
targetLang
==
Language
.
WuuShanghai
:
if
len
(
self
.
m_s2p_map
)
==
0
:
logging
.
error
(
"TextScriptConvertor.InitSyllableFormatter: WuuShanghai syllable to phone map is empty"
)
return
None
return
WuuShanghaiSyllableFormatter
(
self
.
m_s2p_map
)
elif
targetLang
==
Language
.
Sichuan
:
if
len
(
self
.
m_s2p_map
)
==
0
:
logging
.
error
(
"TextScriptConvertor.InitSyllableFormatter: Sichuan syllable to phone map is empty"
)
return
None
return
SichuanSyllableFormatter
(
self
.
m_s2p_map
)
elif
targetLang
==
Language
.
EnGB
:
formatter
=
EnXXSyllableFormatter
(
Language
.
EnGB
)
if
len
(
self
.
m_f2p_map
)
!=
0
:
formatter
.
m_f2t_map
=
self
.
m_f2p_map
return
formatter
elif
targetLang
==
Language
.
EnUS
:
formatter
=
EnXXSyllableFormatter
(
Language
.
EnUS
)
if
len
(
self
.
m_f2p_map
)
!=
0
:
formatter
.
m_f2t_map
=
self
.
m_f2p_map
return
formatter
else
:
logging
.
error
(
"TextScriptConvertor.InitSyllableFormatter: unsupported language: %s"
,
targetLang
,
)
return
None
def
process
(
self
,
textScriptPath
,
outputXMLPath
,
outputMetafile
):
script
=
Script
(
self
.
m_phoneset
,
self
.
m_posset
)
formatted_lines
=
format_prosody
(
textScriptPath
)
line_num
=
0
for
line
in
tqdm
(
formatted_lines
):
if
line_num
%
2
==
0
:
sentence
=
line
.
strip
()
item
=
self
.
parse_sentence
(
sentence
,
line_num
)
else
:
if
item
is
not
None
:
pronunciation
=
line
.
strip
()
res
=
self
.
parse_pronunciation
(
item
,
pronunciation
,
line_num
)
if
res
:
script
.
m_items
.
append
(
item
)
line_num
+=
1
script
.
Save
(
outputXMLPath
)
logging
.
info
(
"TextScriptConvertor.process:
\n
Save script to: %s"
,
outputXMLPath
)
meta_lines
=
script
.
SaveMetafile
()
speaker
=
self
.
m_speaker
meta_lines_tagged
=
[]
for
line
in
meta_lines
:
line_id
,
line_text
=
line
.
split
(
"
\t
"
)
emo
=
self
.
m_emo_dict
.
get
(
line_id
,
"emotion_neutral"
)
syll_items
=
line_text
.
split
(
" "
)
syll_items_tagged
=
[]
for
syll_item
in
syll_items
:
syll_item_tagged
=
syll_item
[:
-
1
]
+
"$"
+
emo
+
"$"
+
speaker
+
"}"
syll_items_tagged
.
append
(
syll_item_tagged
)
meta_lines_tagged
.
append
(
line_id
+
"
\t
"
+
" "
.
join
(
syll_items_tagged
))
with
open
(
outputMetafile
,
"w"
)
as
f
:
for
line
in
meta_lines_tagged
:
f
.
write
(
line
+
"
\n
"
)
logging
.
info
(
"TextScriptConvertor.process:
\n
Save metafile to: %s"
,
outputMetafile
)
@
staticmethod
def
turn_text_into_bytes
(
plain_text_path
,
output_meta_file_path
,
speaker
):
meta_lines
=
[]
with
open
(
plain_text_path
,
"r"
)
as
in_file
:
for
text_line
in
in_file
:
[
sentence_id
,
sentence
]
=
text_line
.
strip
().
split
(
"
\t
"
)
sequence
=
[]
for
character
in
sentence
:
hex_string
=
character
.
encode
(
"utf-8"
).
hex
()
i
=
0
while
i
<
len
(
hex_string
):
byte_hex
=
hex_string
[
i
:
i
+
2
]
bit_array
=
BitArray
(
hex
=
byte_hex
)
integer
=
bit_array
.
uint
if
integer
>
255
:
logging
.
error
(
"TextScriptConverter.turn_text_into_bytes: invalid byte conversion in sentence {}
\
character {}: (uint) {} - (hex) {}"
.
format
(
sentence_id
,
character
,
integer
,
character
.
encode
(
"utf-8"
).
hex
(),
)
)
continue
sequence
.
append
(
"{{{}$emotion_neutral${}}}"
.
format
(
integer
,
speaker
)
)
i
+=
2
if
sequence
[
-
1
][
1
:].
split
(
"$"
)[
0
]
not
in
[
"33"
,
"46"
,
"63"
]:
sequence
.
append
(
"{{46$emotion_neutral${}}}"
.
format
(
speaker
))
meta_lines
.
append
(
"{}
\t
{}
\n
"
.
format
(
sentence_id
,
" "
.
join
(
sequence
)))
with
open
(
output_meta_file_path
,
"w"
)
as
out_file
:
out_file
.
writelines
(
meta_lines
)
def
main
(
args
):
logging
.
basicConfig
(
level
=
logging
.
INFO
)
logging
.
info
(
"TextScriptConvertor.main: start"
)
if
args
.
speaker
is
None
:
speaker
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
args
.
text_script_path
))
else
:
speaker
=
args
.
speaker
convertor
=
TextScriptConvertor
(
args
.
phoneset_path
,
args
.
posset_path
,
args
.
language
,
args
.
foreignLang
,
args
.
f2t_map_path
,
args
.
s2p_map_path
,
args
.
emo_tag_path
,
speaker
,
)
convertor
.
process
(
args
.
text_script_path
,
args
.
output_xml_path
,
args
.
output_metafile
)
logging
.
info
(
"TextScriptConvertor.main: end"
)
# TODO(jin): add emotional style; add speaker info;
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
"TextScriptConvertor"
)
parser
.
add_argument
(
"--language"
,
type
=
str
,
required
=
True
,
help
=
"target language"
)
parser
.
add_argument
(
"--foreignLang"
,
type
=
str
,
required
=
True
,
help
=
"foreign language"
)
parser
.
add_argument
(
"--phoneset_path"
,
type
=
str
,
required
=
True
,
help
=
"phoneset path"
)
parser
.
add_argument
(
"--posset_path"
,
type
=
str
,
required
=
True
,
help
=
"posset path"
)
parser
.
add_argument
(
"--f2t_map_path"
,
type
=
str
,
required
=
True
,
help
=
"f2t map path"
)
parser
.
add_argument
(
"--s2p_map_path"
,
type
=
str
,
required
=
True
,
help
=
"s2p map path"
)
parser
.
add_argument
(
"--text_script_path"
,
type
=
str
,
required
=
True
,
help
=
"input text script path"
)
parser
.
add_argument
(
"--output_xml_path"
,
type
=
str
,
required
=
True
,
help
=
"output xml path"
)
parser
.
add_argument
(
"--output_metafile"
,
type
=
str
,
required
=
True
,
help
=
"output metafile path"
)
parser
.
add_argument
(
"--emo_tag_path"
,
type
=
str
,
default
=
None
,
help
=
"emotion tag path"
)
parser
.
add_argument
(
"--speaker"
,
type
=
str
,
default
=
None
,
help
=
"speaker"
)
args
=
parser
.
parse_args
()
main
(
args
)
kantts/preprocess/script_convertor/__init__.py
0 → 100644
View file @
ee10550a
kantts/preprocess/script_convertor/__pycache__/TextScriptConvertor.cpython-38.pyc
0 → 100644
View file @
ee10550a
File added
kantts/preprocess/script_convertor/__pycache__/__init__.cpython-38.pyc
0 → 100644
View file @
ee10550a
File added
kantts/preprocess/script_convertor/core/Phone.py
0 → 100644
View file @
ee10550a
from
.XmlObj
import
XmlObj
from
.core_types
import
PhoneCVType
,
PhoneIFType
,
PhoneUVType
,
PhoneAPType
,
PhoneAMType
class
Phone
(
XmlObj
):
def
__init__
(
self
):
self
.
m_id
=
None
self
.
m_name
=
None
self
.
m_cv_type
=
PhoneCVType
.
NULL
self
.
m_if_type
=
PhoneIFType
.
NULL
self
.
m_uv_type
=
PhoneUVType
.
NULL
self
.
m_ap_type
=
PhoneAPType
.
NULL
self
.
m_am_type
=
PhoneAMType
.
NULL
self
.
m_bnd
=
False
def
__str__
(
self
):
return
self
.
m_name
def
Save
(
self
):
pass
def
Load
(
self
,
phone_node
):
ns
=
"{http://schemas.alibaba-inc.com/tts}"
id_node
=
phone_node
.
find
(
ns
+
"id"
)
self
.
m_id
=
int
(
id_node
.
text
)
name_node
=
phone_node
.
find
(
ns
+
"name"
)
self
.
m_name
=
name_node
.
text
cv_node
=
phone_node
.
find
(
ns
+
"cv"
)
self
.
m_cv_type
=
PhoneCVType
.
parse
(
cv_node
.
text
)
if_node
=
phone_node
.
find
(
ns
+
"if"
)
self
.
m_if_type
=
PhoneIFType
.
parse
(
if_node
.
text
)
uv_node
=
phone_node
.
find
(
ns
+
"uv"
)
self
.
m_uv_type
=
PhoneUVType
.
parse
(
uv_node
.
text
)
ap_node
=
phone_node
.
find
(
ns
+
"ap"
)
self
.
m_ap_type
=
PhoneAPType
.
parse
(
ap_node
.
text
)
am_node
=
phone_node
.
find
(
ns
+
"am"
)
self
.
m_am_type
=
PhoneAMType
.
parse
(
am_node
.
text
)
kantts/preprocess/script_convertor/core/PhoneSet.py
0 → 100644
View file @
ee10550a
import
xml.etree.ElementTree
as
ET
import
logging
from
.XmlObj
import
XmlObj
from
.Phone
import
Phone
class
PhoneSet
(
XmlObj
):
def
__init__
(
self
,
phoneset_path
):
self
.
m_phone_list
=
[]
self
.
m_id_map
=
{}
self
.
m_name_map
=
{}
self
.
Load
(
phoneset_path
)
def
Load
(
self
,
file_path
):
# alibaba tts xml namespace
ns
=
"{http://schemas.alibaba-inc.com/tts}"
phoneset_root
=
ET
.
parse
(
file_path
).
getroot
()
for
phone_node
in
phoneset_root
.
findall
(
ns
+
"phone"
):
phone
=
Phone
()
phone
.
Load
(
phone_node
)
self
.
m_phone_list
.
append
(
phone
)
if
phone
.
m_id
in
self
.
m_id_map
:
logging
.
error
(
"PhoneSet.Load: duplicate id: %d"
,
phone
.
m_id
)
self
.
m_id_map
[
phone
.
m_id
]
=
phone
if
phone
.
m_name
in
self
.
m_name_map
:
logging
.
error
(
"PhoneSet.Load duplicate name name: %s"
,
phone
.
m_name
)
self
.
m_name_map
[
phone
.
m_name
]
=
phone
def
Save
(
self
):
pass
# if __name__ == "__main__":
# import os
# import sys
#
# phoneset = PhoneSet()
# phoneset.Load(sys.argv[1])
#
# for phone in phoneset.m_phone_list:
# print(phone)
# print(phone.m_id)
# print(phone.m_name)
# print(phone.m_cv_type)
# print(phone.m_if_type)
# print(phone.m_uv_type)
# print(phone.m_ap_type)
# print(phone.m_am_type)
# print(phone.m_bnd)
kantts/preprocess/script_convertor/core/Pos.py
0 → 100644
View file @
ee10550a
from
.XmlObj
import
XmlObj
class
Pos
(
XmlObj
):
def
__init__
(
self
):
self
.
m_id
=
None
self
.
m_name
=
None
self
.
m_desc
=
None
self
.
m_level
=
1
self
.
m_parent
=
None
self
.
m_sub_pos_list
=
[]
def
__str__
(
self
):
return
self
.
m_name
def
Save
(
self
):
pass
def
Load
(
self
,
pos_node
):
ns
=
"{http://schemas.alibaba-inc.com/tts}"
id_node
=
pos_node
.
find
(
ns
+
"id"
)
self
.
m_id
=
int
(
id_node
.
text
)
name_node
=
pos_node
.
find
(
ns
+
"name"
)
self
.
m_name
=
name_node
.
text
desc_node
=
pos_node
.
find
(
ns
+
"desc"
)
self
.
m_desc
=
desc_node
.
text
sub_node
=
pos_node
.
find
(
ns
+
"sub"
)
if
sub_node
is
not
None
:
for
sub_pos_node
in
sub_node
.
findall
(
ns
+
"pos"
):
sub_pos
=
Pos
()
sub_pos
.
Load
(
sub_pos_node
)
sub_pos
.
m_parent
=
self
sub_pos
.
m_level
=
self
.
m_level
+
1
self
.
m_sub_pos_list
.
append
(
sub_pos
)
return
kantts/preprocess/script_convertor/core/PosSet.py
0 → 100644
View file @
ee10550a
import
xml.etree.ElementTree
as
ET
import
logging
from
.XmlObj
import
XmlObj
from
.Pos
import
Pos
class
PosSet
(
XmlObj
):
def
__init__
(
self
,
posset_path
):
self
.
m_pos_list
=
[]
self
.
m_id_map
=
{}
self
.
m_name_map
=
{}
self
.
Load
(
posset_path
)
def
Load
(
self
,
file_path
):
# alibaba tts xml namespace
ns
=
"{http://schemas.alibaba-inc.com/tts}"
posset_root
=
ET
.
parse
(
file_path
).
getroot
()
for
pos_node
in
posset_root
.
findall
(
ns
+
"pos"
):
pos
=
Pos
()
pos
.
Load
(
pos_node
)
self
.
m_pos_list
.
append
(
pos
)
if
pos
.
m_id
in
self
.
m_id_map
:
logging
.
error
(
"PosSet.Load: duplicate id: %d"
,
pos
.
m_id
)
self
.
m_id_map
[
pos
.
m_id
]
=
pos
if
pos
.
m_name
in
self
.
m_name_map
:
logging
.
error
(
"PosSet.Load duplicate name name: %s"
,
pos
.
m_name
)
self
.
m_name_map
[
pos
.
m_name
]
=
pos
if
len
(
pos
.
m_sub_pos_list
)
>
0
:
for
sub_pos
in
pos
.
m_sub_pos_list
:
self
.
m_pos_list
.
append
(
sub_pos
)
if
sub_pos
.
m_id
in
self
.
m_id_map
:
logging
.
error
(
"PosSet.Load: duplicate id: %d"
,
sub_pos
.
m_id
)
self
.
m_id_map
[
sub_pos
.
m_id
]
=
sub_pos
if
sub_pos
.
m_name
in
self
.
m_name_map
:
logging
.
error
(
"PosSet.Load duplicate name name: %s"
,
sub_pos
.
m_name
)
self
.
m_name_map
[
sub_pos
.
m_name
]
=
sub_pos
def
Save
(
self
):
pass
# if __name__ == "__main__":
# import os
# import sys
#
# posset = PosSet()
# posset.Load(sys.argv[1])
#
# for pos in posset.m_pos_list:
# print(pos)
# print(pos.m_id)
# print(pos.m_name)
# print(pos.m_desc)
# print(pos.m_level)
# print(pos.m_parent)
# if pos.m_sub_pos_list:
# print("sub pos list:")
# for sub_pos in pos.m_sub_pos_list:
# print(sub_pos)
# print(sub_pos.m_id)
# print(sub_pos.m_name)
# print(sub_pos.m_desc)
# print(sub_pos.m_level)
# print(sub_pos.m_parent)
# print("sub pos list end")
kantts/preprocess/script_convertor/core/Script.py
0 → 100644
View file @
ee10550a
from
.XmlObj
import
XmlObj
import
xml.etree.ElementTree
as
ET
from
xml.dom
import
minidom
class
Script
(
XmlObj
):
def
__init__
(
self
,
phoneset
,
posset
):
self
.
m_phoneset
=
phoneset
self
.
m_posset
=
posset
self
.
m_items
=
[]
def
Save
(
self
,
outputXMLPath
):
root
=
ET
.
Element
(
"script"
)
root
.
set
(
"uttcount"
,
str
(
len
(
self
.
m_items
)))
root
.
set
(
"xmlns"
,
"http://schemas.alibaba-inc.com/tts"
)
for
item
in
self
.
m_items
:
item
.
Save
(
root
)
xmlstr
=
minidom
.
parseString
(
ET
.
tostring
(
root
)).
toprettyxml
(
indent
=
" "
,
encoding
=
"utf-8"
)
with
open
(
outputXMLPath
,
"wb"
)
as
f
:
f
.
write
(
xmlstr
)
def
SaveMetafile
(
self
):
meta_lines
=
[]
for
item
in
self
.
m_items
:
meta_lines
.
append
(
item
.
SaveMetafile
())
return
meta_lines
kantts/preprocess/script_convertor/core/ScriptItem.py
0 → 100644
View file @
ee10550a
import
xml.etree.ElementTree
as
ET
from
.XmlObj
import
XmlObj
class
ScriptItem
(
XmlObj
):
def
__init__
(
self
,
phoneset
,
posset
):
if
phoneset
is
None
or
posset
is
None
:
raise
Exception
(
"ScriptItem.__init__: phoneset or posset is None"
)
self
.
m_phoneset
=
phoneset
self
.
m_posset
=
posset
self
.
m_id
=
None
self
.
m_text
=
""
self
.
m_scriptSentence_list
=
[]
self
.
m_status
=
None
def
Load
(
self
):
pass
def
Save
(
self
,
parent_node
):
utterance_node
=
ET
.
SubElement
(
parent_node
,
"utterance"
)
utterance_node
.
set
(
"id"
,
self
.
m_id
)
text_node
=
ET
.
SubElement
(
utterance_node
,
"text"
)
text_node
.
text
=
self
.
m_text
for
sentence
in
self
.
m_scriptSentence_list
:
sentence
.
Save
(
utterance_node
)
def
SaveMetafile
(
self
):
meta_line
=
self
.
m_id
+
"
\t
"
for
sentence
in
self
.
m_scriptSentence_list
:
meta_line
+=
sentence
.
SaveMetafile
()
return
meta_line
kantts/preprocess/script_convertor/core/ScriptSentence.py
0 → 100644
View file @
ee10550a
from
.XmlObj
import
XmlObj
import
xml.etree.ElementTree
as
ET
# TODO(jin): Not referenced, temporarily commented
class
WrittenSentence
(
XmlObj
):
def
__init__
(
self
,
posset
):
self
.
m_written_word_list
=
[]
self
.
m_written_mark_list
=
[]
self
.
m_posset
=
posset
self
.
m_align_list
=
[]
self
.
m_alignCursor
=
0
self
.
m_accompanyIndex
=
0
self
.
m_sequence
=
""
self
.
m_text
=
""
def
AddHost
(
self
,
writtenWord
):
self
.
m_written_word_list
.
append
(
writtenWord
)
self
.
m_align_list
.
append
(
self
.
m_alignCursor
)
def
LoadHost
(
self
):
pass
def
SaveHost
(
self
):
pass
def
AddAccompany
(
self
,
writtenMark
):
self
.
m_written_mark_list
.
append
(
writtenMark
)
self
.
m_alignCursor
+=
1
self
.
m_accompanyIndex
+=
1
def
SaveAccompany
(
self
):
pass
def
LoadAccompany
(
self
):
pass
# Get the mark span corresponding to specific spoken word
def
GetAccompanySpan
(
self
,
host_index
):
if
host_index
==
-
1
:
return
(
0
,
self
.
m_align_list
[
0
])
accompany_begin
=
self
.
m_align_list
[
host_index
]
accompany_end
=
(
self
.
m_align_list
[
host_index
+
1
]
if
host_index
+
1
<
len
(
self
.
m_written_word_list
)
else
len
(
self
.
m_written_mark_list
)
)
return
(
accompany_begin
,
accompany_end
)
# TODO: iterable
def
GetElements
(
self
):
accompany_begin
,
accompany_end
=
self
.
GetAccompanySpan
(
-
1
)
res_lst
=
[
self
.
m_written_mark_list
[
i
]
for
i
in
range
(
accompany_begin
,
accompany_end
)
]
for
j
in
range
(
len
(
self
.
m_written_word_list
)):
accompany_begin
,
accompany_end
=
self
.
GetAccompanySpan
(
j
)
res_lst
.
extend
([
self
.
m_written_word_list
[
j
]])
res_lst
.
extend
(
[
self
.
m_written_mark_list
[
i
]
for
i
in
range
(
accompany_begin
,
accompany_end
)
]
)
return
res_lst
def
BuildSequence
(
self
):
self
.
m_sequence
=
" "
.
join
([
str
(
ele
)
for
ele
in
self
.
GetElements
()])
def
BuildText
(
self
):
self
.
m_text
=
""
.
join
([
str
(
ele
)
for
ele
in
self
.
GetElements
()])
class
SpokenSentence
(
XmlObj
):
def
__init__
(
self
,
phoneset
):
self
.
m_spoken_word_list
=
[]
self
.
m_spoken_mark_list
=
[]
self
.
m_phoneset
=
phoneset
self
.
m_align_list
=
[]
self
.
m_alignCursor
=
0
self
.
m_accompanyIndex
=
0
self
.
m_sequence
=
""
self
.
m_text
=
""
def
__len__
(
self
):
return
len
(
self
.
m_spoken_word_list
)
def
AddHost
(
self
,
spokenWord
):
self
.
m_spoken_word_list
.
append
(
spokenWord
)
self
.
m_align_list
.
append
(
self
.
m_alignCursor
)
def
SaveHost
(
self
):
pass
def
LoadHost
(
self
):
pass
def
AddAccompany
(
self
,
spokenMark
):
self
.
m_spoken_mark_list
.
append
(
spokenMark
)
self
.
m_alignCursor
+=
1
self
.
m_accompanyIndex
+=
1
def
SaveAccompany
(
self
):
pass
# Get the mark span corresponding to specific spoken word
def
GetAccompanySpan
(
self
,
host_index
):
if
host_index
==
-
1
:
return
(
0
,
self
.
m_align_list
[
0
])
accompany_begin
=
self
.
m_align_list
[
host_index
]
accompany_end
=
(
self
.
m_align_list
[
host_index
+
1
]
if
host_index
+
1
<
len
(
self
.
m_spoken_word_list
)
else
len
(
self
.
m_spoken_mark_list
)
)
return
(
accompany_begin
,
accompany_end
)
# TODO: iterable
def
GetElements
(
self
):
accompany_begin
,
accompany_end
=
self
.
GetAccompanySpan
(
-
1
)
res_lst
=
[
self
.
m_spoken_mark_list
[
i
]
for
i
in
range
(
accompany_begin
,
accompany_end
)
]
for
j
in
range
(
len
(
self
.
m_spoken_word_list
)):
accompany_begin
,
accompany_end
=
self
.
GetAccompanySpan
(
j
)
res_lst
.
extend
([
self
.
m_spoken_word_list
[
j
]])
res_lst
.
extend
(
[
self
.
m_spoken_mark_list
[
i
]
for
i
in
range
(
accompany_begin
,
accompany_end
)
]
)
return
res_lst
def
LoadAccompany
(
self
):
pass
def
BuildSequence
(
self
):
self
.
m_sequence
=
" "
.
join
([
str
(
ele
)
for
ele
in
self
.
GetElements
()])
def
BuildText
(
self
):
self
.
m_text
=
""
.
join
([
str
(
ele
)
for
ele
in
self
.
GetElements
()])
def
Save
(
self
,
parent_node
):
spoken_node
=
ET
.
SubElement
(
parent_node
,
"spoken"
)
spoken_node
.
set
(
"wordcount"
,
str
(
len
(
self
.
m_spoken_word_list
)))
text_node
=
ET
.
SubElement
(
spoken_node
,
"text"
)
text_node
.
text
=
self
.
m_sequence
# TODO: spoken mark might be used
for
word
in
self
.
m_spoken_word_list
:
word
.
Save
(
spoken_node
)
def
SaveMetafile
(
self
):
meta_line_list
=
[
word
.
SaveMetafile
()
for
word
in
self
.
m_spoken_word_list
]
return
" "
.
join
(
meta_line_list
)
class
ScriptSentence
(
XmlObj
):
def
__init__
(
self
,
phoneset
,
posset
):
self
.
m_phoneset
=
phoneset
self
.
m_posset
=
posset
self
.
m_writtenSentence
=
WrittenSentence
(
posset
)
self
.
m_spokenSentence
=
SpokenSentence
(
phoneset
)
self
.
m_text
=
""
def
Save
(
self
,
parent_node
):
if
len
(
self
.
m_spokenSentence
)
>
0
:
self
.
m_spokenSentence
.
Save
(
parent_node
)
def
SaveMetafile
(
self
):
if
len
(
self
.
m_spokenSentence
)
>
0
:
return
self
.
m_spokenSentence
.
SaveMetafile
()
else
:
return
""
kantts/preprocess/script_convertor/core/ScriptWord.py
0 → 100644
View file @
ee10550a
import
xml.etree.ElementTree
as
ET
from
.XmlObj
import
XmlObj
from
.core_types
import
Language
from
.Syllable
import
SyllableList
# TODO(Jin): Not referenced, temporarily commented
class
WrittenWord
(
XmlObj
):
def
__init__
(
self
):
self
.
m_name
=
None
self
.
m_POS
=
None
def
__str__
(
self
):
return
self
.
m_name
def
Load
(
self
):
pass
def
Save
(
self
):
pass
class
WrittenMark
(
XmlObj
):
def
__init__
(
self
):
self
.
m_punctuation
=
None
def
__str__
(
self
):
return
self
.
m_punctuation
def
Load
(
self
):
pass
def
Save
(
self
):
pass
class
SpokenWord
(
XmlObj
):
def
__init__
(
self
):
self
.
m_name
=
None
self
.
m_language
=
None
self
.
m_syllable_list
=
[]
self
.
m_breakText
=
"1"
self
.
m_POS
=
"0"
def
__str__
(
self
):
return
self
.
m_name
def
Load
(
self
):
pass
def
Save
(
self
,
parent_node
):
word_node
=
ET
.
SubElement
(
parent_node
,
"word"
)
name_node
=
ET
.
SubElement
(
word_node
,
"name"
)
name_node
.
text
=
self
.
m_name
if
(
len
(
self
.
m_syllable_list
)
>
0
and
self
.
m_syllable_list
[
0
].
m_language
!=
Language
.
Neutral
):
language_node
=
ET
.
SubElement
(
word_node
,
"lang"
)
language_node
.
text
=
self
.
m_syllable_list
[
0
].
m_language
.
name
SyllableList
(
self
.
m_syllable_list
).
Save
(
word_node
)
break_node
=
ET
.
SubElement
(
word_node
,
"break"
)
break_node
.
text
=
self
.
m_breakText
POS_node
=
ET
.
SubElement
(
word_node
,
"POS"
)
POS_node
.
text
=
self
.
m_POS
return
def
SaveMetafile
(
self
):
word_phone_cnt
=
sum
(
[
syllable
.
PhoneCount
()
for
syllable
in
self
.
m_syllable_list
]
)
word_syllable_cnt
=
len
(
self
.
m_syllable_list
)
single_syllable_word
=
word_syllable_cnt
==
1
meta_line_list
=
[]
for
idx
,
syll
in
enumerate
(
self
.
m_syllable_list
):
if
word_phone_cnt
==
1
:
word_pos
=
"word_both"
elif
idx
==
0
:
word_pos
=
"word_begin"
elif
idx
==
len
(
self
.
m_syllable_list
)
-
1
:
word_pos
=
"word_end"
else
:
word_pos
=
"word_middle"
meta_line_list
.
append
(
syll
.
SaveMetafile
(
word_pos
,
single_syllable_word
=
single_syllable_word
)
)
if
self
.
m_breakText
!=
"0"
and
self
.
m_breakText
is
not
None
:
meta_line_list
.
append
(
"{{#{}$tone_none$s_none$word_none}}"
.
format
(
self
.
m_breakText
)
)
return
" "
.
join
(
meta_line_list
)
class
SpokenMark
(
XmlObj
):
def
__init__
(
self
):
self
.
m_breakLevel
=
None
def
BreakLevel2Text
(
self
):
return
"#"
+
str
(
self
.
m_breakLevel
.
value
)
def
__str__
(
self
):
return
self
.
BreakLevel2Text
()
def
Load
(
self
):
pass
def
Save
(
self
):
pass
kantts/preprocess/script_convertor/core/Syllable.py
0 → 100644
View file @
ee10550a
import
xml.etree.ElementTree
as
ET
from
.XmlObj
import
XmlObj
class
Syllable
(
XmlObj
):
def
__init__
(
self
):
self
.
m_phone_list
=
[]
self
.
m_tone
=
None
self
.
m_language
=
None
self
.
m_breaklevel
=
None
def
PronunciationText
(
self
):
return
" "
.
join
([
str
(
phone
)
for
phone
in
self
.
m_phone_list
])
def
PhoneCount
(
self
):
return
len
(
self
.
m_phone_list
)
def
ToneText
(
self
):
return
str
(
self
.
m_tone
.
value
)
def
Save
(
self
):
pass
def
Load
(
self
):
pass
def
GetPhoneMeta
(
self
,
phone_name
,
word_pos
,
syll_pos
,
tone_text
,
single_syllable_word
=
False
):
# Special case: word with single syllable, the last phone's word_pos should be "word_end"
if
word_pos
==
"word_begin"
and
syll_pos
==
"s_end"
and
single_syllable_word
:
word_pos
=
"word_end"
elif
word_pos
==
"word_begin"
and
syll_pos
not
in
[
"s_begin"
,
"s_both"
,
]:
# FIXME: keep accord with Engine logic
word_pos
=
"word_middle"
elif
word_pos
==
"word_end"
and
syll_pos
not
in
[
"s_end"
,
"s_both"
]:
word_pos
=
"word_middle"
else
:
pass
return
"{{{}$tone{}${}${}}}"
.
format
(
phone_name
,
tone_text
,
syll_pos
,
word_pos
)
def
SaveMetafile
(
self
,
word_pos
,
single_syllable_word
=
False
):
syllable_phone_cnt
=
len
(
self
.
m_phone_list
)
meta_line_list
=
[]
for
idx
,
phone
in
enumerate
(
self
.
m_phone_list
):
if
syllable_phone_cnt
==
1
:
syll_pos
=
"s_both"
elif
idx
==
0
:
syll_pos
=
"s_begin"
elif
idx
==
len
(
self
.
m_phone_list
)
-
1
:
syll_pos
=
"s_end"
else
:
syll_pos
=
"s_middle"
meta_line_list
.
append
(
self
.
GetPhoneMeta
(
phone
,
word_pos
,
syll_pos
,
self
.
ToneText
(),
single_syllable_word
=
single_syllable_word
,
)
)
return
" "
.
join
(
meta_line_list
)
class
SyllableList
(
XmlObj
):
def
__init__
(
self
,
syllables
):
self
.
m_syllable_list
=
syllables
def
__len__
(
self
):
return
len
(
self
.
m_syllable_list
)
def
__index__
(
self
,
index
):
return
self
.
m_syllable_list
[
index
]
def
PronunciationText
(
self
):
return
" - "
.
join
(
[
syllable
.
PronunciationText
()
for
syllable
in
self
.
m_syllable_list
]
)
def
ToneText
(
self
):
return
""
.
join
([
syllable
.
ToneText
()
for
syllable
in
self
.
m_syllable_list
])
def
Save
(
self
,
parent_node
):
syllable_node
=
ET
.
SubElement
(
parent_node
,
"syllable"
)
syllable_node
.
set
(
"syllcount"
,
str
(
len
(
self
.
m_syllable_list
)))
phone_node
=
ET
.
SubElement
(
syllable_node
,
"phone"
)
phone_node
.
text
=
self
.
PronunciationText
()
tone_node
=
ET
.
SubElement
(
syllable_node
,
"tone"
)
tone_node
.
text
=
self
.
ToneText
()
return
def
Load
(
self
):
pass
kantts/preprocess/script_convertor/core/SyllableFormatter.py
0 → 100644
View file @
ee10550a
import
re
import
logging
from
.utils
import
NgBreakPattern
from
.Syllable
import
Syllable
from
.core_types
import
Language
,
Tone
,
PhoneCVType
class
DefaultSyllableFormatter
:
def
__init__
(
self
):
return
def
Format
(
self
,
phoneset
,
pronText
,
syllable_list
):
logging
.
warning
(
"Using DefaultSyllableFormatter dry run: %s"
,
pronText
)
return
True
RegexNg2en
=
re
.
compile
(
NgBreakPattern
)
RegexQingSheng
=
re
.
compile
(
r
"([1-5]5)"
)
RegexPron
=
re
.
compile
(
r
"(?P<Pron>[a-z]+)(?P<Tone>[1-6])"
)
class
ZhCNSyllableFormatter
:
def
__init__
(
self
,
sy2ph_map
):
self
.
m_sy2ph_map
=
sy2ph_map
def
NormalizePron
(
self
,
pronText
):
# Replace Qing Sheng
newPron
=
pronText
.
replace
(
"6"
,
"2"
)
newPron
=
re
.
sub
(
RegexQingSheng
,
"5"
,
newPron
)
# FIXME(Jin): ng case overrides newPron
match
=
RegexNg2en
.
search
(
newPron
)
if
match
:
newPron
=
"en"
+
match
.
group
(
"break"
)
return
newPron
def
Format
(
self
,
phoneset
,
pronText
,
syllable_list
):
if
phoneset
is
None
or
syllable_list
is
None
or
pronText
is
None
:
logging
.
error
(
"ZhCNSyllableFormatter.Format: invalid input"
)
return
False
pronText
=
self
.
NormalizePron
(
pronText
)
if
pronText
in
self
.
m_sy2ph_map
:
phone_list
=
self
.
m_sy2ph_map
[
pronText
].
split
(
" "
)
if
len
(
phone_list
)
==
3
:
syll
=
Syllable
()
for
phone
in
phone_list
:
syll
.
m_phone_list
.
append
(
phone
)
syll
.
m_tone
=
Tone
.
parse
(
pronText
[
-
1
]
)
# FIXME(Jin): assume tone is the last char
syll
.
m_language
=
Language
.
ZhCN
syllable_list
.
append
(
syll
)
return
True
else
:
logging
.
error
(
"ZhCNSyllableFormatter.Format: invalid pronText: %s"
,
pronText
)
return
False
else
:
logging
.
error
(
"ZhCNSyllableFormatter.Format: syllable to phone map missing key: %s"
,
pronText
,
)
return
False
class
PinYinSyllableFormatter
:
def
__init__
(
self
,
sy2ph_map
):
self
.
m_sy2ph_map
=
sy2ph_map
def
NormalizePron
(
self
,
pronText
):
newPron
=
pronText
.
replace
(
"6"
,
"2"
)
newPron
=
re
.
sub
(
RegexQingSheng
,
"5"
,
newPron
)
# FIXME(Jin): ng case overrides newPron
match
=
RegexNg2en
.
search
(
newPron
)
if
match
:
newPron
=
"en"
+
match
.
group
(
"break"
)
return
newPron
def
Format
(
self
,
phoneset
,
pronText
,
syllable_list
):
if
phoneset
is
None
or
syllable_list
is
None
or
pronText
is
None
:
logging
.
error
(
"PinYinSyllableFormatter.Format: invalid input"
)
return
False
pronText
=
self
.
NormalizePron
(
pronText
)
match
=
RegexPron
.
search
(
pronText
)
if
match
:
pron
=
match
.
group
(
"Pron"
)
tone
=
match
.
group
(
"Tone"
)
else
:
logging
.
error
(
"PinYinSyllableFormatter.Format: pronunciation is not valid: %s"
,
pronText
,
)
return
False
if
pron
in
self
.
m_sy2ph_map
:
phone_list
=
self
.
m_sy2ph_map
[
pron
].
split
(
" "
)
if
len
(
phone_list
)
in
[
1
,
2
]:
syll
=
Syllable
()
for
phone
in
phone_list
:
syll
.
m_phone_list
.
append
(
phone
)
syll
.
m_tone
=
Tone
.
parse
(
tone
)
syll
.
m_language
=
Language
.
PinYin
syllable_list
.
append
(
syll
)
return
True
else
:
logging
.
error
(
"PinYinSyllableFormatter.Format: invalid phone: %s"
,
pron
)
return
False
else
:
logging
.
error
(
"PinYinSyllableFormatter.Format: syllable to phone map missing key: %s"
,
pron
,
)
return
False
class
ZhHKSyllableFormatter
:
def
__init__
(
self
,
sy2ph_map
):
self
.
m_sy2ph_map
=
sy2ph_map
def
Format
(
self
,
phoneset
,
pronText
,
syllable_list
):
if
phoneset
is
None
or
syllable_list
is
None
or
pronText
is
None
:
logging
.
error
(
"ZhHKSyllableFormatter.Format: invalid input"
)
return
False
match
=
RegexPron
.
search
(
pronText
)
if
match
:
pron
=
match
.
group
(
"Pron"
)
tone
=
match
.
group
(
"Tone"
)
else
:
logging
.
error
(
"ZhHKSyllableFormatter.Format: pronunciation is not valid: %s"
,
pronText
)
return
False
if
pron
in
self
.
m_sy2ph_map
:
phone_list
=
self
.
m_sy2ph_map
[
pron
].
split
(
" "
)
if
len
(
phone_list
)
in
[
1
,
2
]:
syll
=
Syllable
()
for
phone
in
phone_list
:
syll
.
m_phone_list
.
append
(
phone
)
syll
.
m_tone
=
Tone
.
parse
(
tone
)
syll
.
m_language
=
Language
.
ZhHK
syllable_list
.
append
(
syll
)
return
True
else
:
logging
.
error
(
"ZhHKSyllableFormatter.Format: invalid phone: %s"
,
pron
)
return
False
else
:
logging
.
error
(
"ZhHKSyllableFormatter.Format: syllable to phone map missing key: %s"
,
pron
,
)
return
False
class
WuuShanghaiSyllableFormatter
:
def
__init__
(
self
,
sy2ph_map
):
self
.
m_sy2ph_map
=
sy2ph_map
def
Format
(
self
,
phoneset
,
pronText
,
syllable_list
):
if
phoneset
is
None
or
syllable_list
is
None
or
pronText
is
None
:
logging
.
error
(
"WuuShanghaiSyllableFormatter.Format: invalid input"
)
return
False
match
=
RegexPron
.
search
(
pronText
)
if
match
:
pron
=
match
.
group
(
"Pron"
)
tone
=
match
.
group
(
"Tone"
)
else
:
logging
.
error
(
"WuuShanghaiSyllableFormatter.Format: pronunciation is not valid: %s"
,
pronText
,
)
return
False
if
pron
in
self
.
m_sy2ph_map
:
phone_list
=
self
.
m_sy2ph_map
[
pron
].
split
(
" "
)
if
len
(
phone_list
)
in
[
1
,
2
]:
syll
=
Syllable
()
for
phone
in
phone_list
:
syll
.
m_phone_list
.
append
(
phone
)
syll
.
m_tone
=
Tone
.
parse
(
tone
)
syll
.
m_language
=
Language
.
WuuShanghai
syllable_list
.
append
(
syll
)
return
True
else
:
logging
.
error
(
"WuuShanghaiSyllableFormatter.Format: invalid phone: %s"
,
pron
)
return
False
else
:
logging
.
error
(
"WuuShanghaiSyllableFormatter.Format: syllable to phone map missing key: %s"
,
pron
,
)
return
False
class
SichuanSyllableFormatter
:
def
__init__
(
self
,
sy2ph_map
):
self
.
m_sy2ph_map
=
sy2ph_map
def
Format
(
self
,
phoneset
,
pronText
,
syllable_list
):
if
phoneset
is
None
or
syllable_list
is
None
or
pronText
is
None
:
logging
.
error
(
"SichuanSyllableFormatter.Format: invalid input"
)
return
False
match
=
RegexPron
.
search
(
pronText
)
if
match
:
pron
=
match
.
group
(
"Pron"
)
tone
=
match
.
group
(
"Tone"
)
else
:
logging
.
error
(
"SichuanSyllableFormatter.Format: pronunciation is not valid: %s"
,
pronText
,
)
return
False
if
pron
in
self
.
m_sy2ph_map
:
phone_list
=
self
.
m_sy2ph_map
[
pron
].
split
(
" "
)
if
len
(
phone_list
)
in
[
1
,
2
]:
syll
=
Syllable
()
for
phone
in
phone_list
:
syll
.
m_phone_list
.
append
(
phone
)
syll
.
m_tone
=
Tone
.
parse
(
tone
)
syll
.
m_language
=
Language
.
Sichuan
syllable_list
.
append
(
syll
)
return
True
else
:
logging
.
error
(
"SichuanSyllableFormatter.Format: invalid phone: %s"
,
pron
)
return
False
else
:
logging
.
error
(
"SichuanSyllableFormatter.Format: syllable to phone map missing key: %s"
,
pron
,
)
return
False
class
EnXXSyllableFormatter
:
def
__init__
(
self
,
language
):
self
.
m_f2t_map
=
None
self
.
m_language
=
language
def
NormalizePron
(
self
,
pronText
):
newPron
=
pronText
.
replace
(
"#"
,
"."
)
newPron
=
(
newPron
.
replace
(
"03"
,
"0"
)
.
replace
(
"13"
,
"1"
)
.
replace
(
"23"
,
"2"
)
.
replace
(
"3"
,
""
)
)
newPron
=
newPron
.
replace
(
"2"
,
"0"
)
return
newPron
def
Format
(
self
,
phoneset
,
pronText
,
syllable_list
):
if
phoneset
is
None
or
syllable_list
is
None
or
pronText
is
None
:
logging
.
error
(
"EnXXSyllableFormatter.Format: invalid input"
)
return
False
pronText
=
self
.
NormalizePron
(
pronText
)
syllables
=
[
ele
.
strip
()
for
ele
in
pronText
.
split
(
"."
)]
for
i
in
range
(
len
(
syllables
)):
syll
=
Syllable
()
syll
.
m_language
=
self
.
m_language
syll
.
m_tone
=
Tone
.
parse
(
"0"
)
phones
=
re
.
split
(
r
"[\s]+"
,
syllables
[
i
])
for
j
in
range
(
len
(
phones
)):
phoneName
=
phones
[
j
].
lower
()
toneName
=
"0"
if
"0"
in
phoneName
or
"1"
in
phoneName
or
"2"
in
phoneName
:
toneName
=
phoneName
[
-
1
]
phoneName
=
phoneName
[:
-
1
]
phoneName_lst
=
None
if
self
.
m_f2t_map
is
not
None
:
phoneName_lst
=
self
.
m_f2t_map
.
get
(
phoneName
,
None
)
if
phoneName_lst
is
None
:
phoneName_lst
=
[
phoneName
]
for
new_phoneName
in
phoneName_lst
:
phone_obj
=
phoneset
.
m_name_map
.
get
(
new_phoneName
,
None
)
if
phone_obj
is
None
:
logging
.
error
(
"EnXXSyllableFormatter.Format: phone %s not found"
,
new_phoneName
,
)
return
False
phone_obj
.
m_name
=
new_phoneName
syll
.
m_phone_list
.
append
(
phone_obj
)
if
phone_obj
.
m_cv_type
==
PhoneCVType
.
Vowel
:
syll
.
m_tone
=
Tone
.
parse
(
toneName
)
if
j
==
len
(
phones
)
-
1
:
phone_obj
.
m_bnd
=
True
syllable_list
.
append
(
syll
)
return
True
kantts/preprocess/script_convertor/core/XmlObj.py
0 → 100644
View file @
ee10550a
class
XmlObj
:
def
__init__
(
self
):
pass
def
Load
(
self
):
pass
def
Save
(
self
):
pass
def
LoadData
(
self
):
pass
def
SaveData
(
self
):
pass
kantts/preprocess/script_convertor/core/__init__.py
0 → 100644
View file @
ee10550a
kantts/preprocess/script_convertor/core/__pycache__/Phone.cpython-38.pyc
0 → 100644
View file @
ee10550a
File added
kantts/preprocess/script_convertor/core/__pycache__/PhoneSet.cpython-38.pyc
0 → 100644
View file @
ee10550a
File added
kantts/preprocess/script_convertor/core/__pycache__/Pos.cpython-38.pyc
0 → 100644
View file @
ee10550a
File added
kantts/preprocess/script_convertor/core/__pycache__/PosSet.cpython-38.pyc
0 → 100644
View file @
ee10550a
File added
Prev
1
…
3
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment