Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Paraformer_FunASR_pytorch
Commits
70a8a9e0
Commit
70a8a9e0
authored
Oct 03, 2024
by
wangwei990215
Browse files
initial commit
parents
Pipeline
#1738
failed with stages
in 0 seconds
Changes
827
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
615 additions
and
0 deletions
+615
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/cardinal.py
...ing/inverse_text_normalization/en/verbalizers/cardinal.py
+38
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/date.py
...cessing/inverse_text_normalization/en/verbalizers/date.py
+70
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/decimal.py
...sing/inverse_text_normalization/en/verbalizers/decimal.py
+48
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/electronic.py
...g/inverse_text_normalization/en/verbalizers/electronic.py
+45
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/fraction.py
...ing/inverse_text_normalization/en/verbalizers/fraction.py
+10
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/measure.py
...sing/inverse_text_normalization/en/verbalizers/measure.py
+47
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/money.py
...essing/inverse_text_normalization/en/verbalizers/money.py
+26
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/ordinal.py
...sing/inverse_text_normalization/en/verbalizers/ordinal.py
+48
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/telephone.py
...ng/inverse_text_normalization/en/verbalizers/telephone.py
+30
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/time.py
...cessing/inverse_text_normalization/en/verbalizers/time.py
+68
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/verbalize.py
...ng/inverse_text_normalization/en/verbalizers/verbalize.py
+47
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/verbalize_final.py
...erse_text_normalization/en/verbalizers/verbalize_final.py
+33
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/whitelist.py
...ng/inverse_text_normalization/en/verbalizers/whitelist.py
+27
-0
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/word.py
...cessing/inverse_text_normalization/en/verbalizers/word.py
+29
-0
FunASR/fun_text_processing/inverse_text_normalization/es/__init__.py
...text_processing/inverse_text_normalization/es/__init__.py
+7
-0
FunASR/fun_text_processing/inverse_text_normalization/es/data/__init__.py
...processing/inverse_text_normalization/es/data/__init__.py
+1
-0
FunASR/fun_text_processing/inverse_text_normalization/es/data/currency_plural.tsv
...ng/inverse_text_normalization/es/data/currency_plural.tsv
+7
-0
FunASR/fun_text_processing/inverse_text_normalization/es/data/currency_singular.tsv
.../inverse_text_normalization/es/data/currency_singular.tsv
+7
-0
FunASR/fun_text_processing/inverse_text_normalization/es/data/electronic/__init__.py
...inverse_text_normalization/es/data/electronic/__init__.py
+1
-0
FunASR/fun_text_processing/inverse_text_normalization/es/data/electronic/domain.tsv
.../inverse_text_normalization/es/data/electronic/domain.tsv
+26
-0
No files found.
Too many changes to show.
To preserve performance only
827 of 827+
files are displayed.
Plain diff
Email patch
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/cardinal.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.text_normalization.en.graph_utils
import
(
DAMO_NOT_QUOTE
,
GraphFst
,
delete_space
,
)
from
pynini.lib
import
pynutil
class
CardinalFst
(
GraphFst
):
"""
Finite state transducer for verbalizing cardinal
e.g. cardinal { integer: "23" negative: "-" } -> -23
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"cardinal"
,
kind
=
"verbalize"
)
optional_sign
=
pynini
.
closure
(
pynutil
.
delete
(
"negative:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
DAMO_NOT_QUOTE
+
pynutil
.
delete
(
'"'
)
+
delete_space
,
0
,
1
,
)
graph
=
(
pynutil
.
delete
(
"integer:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
)
self
.
numbers
=
graph
graph
=
optional_sign
+
graph
delete_tokens
=
self
.
delete_tokens
(
graph
)
self
.
fst
=
delete_tokens
.
optimize
()
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/date.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.text_normalization.en.graph_utils
import
(
DAMO_NOT_QUOTE
,
GraphFst
,
delete_extra_space
,
delete_space
,
)
from
pynini.lib
import
pynutil
class
DateFst
(
GraphFst
):
"""
Finite state transducer for verbalizing date, e.g.
date { month: "january" day: "5" year: "2012" preserve_order: true } -> february 5 2012
date { day: "5" month: "january" year: "2012" preserve_order: true } -> 5 february 2012
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"date"
,
kind
=
"verbalize"
)
month
=
(
pynutil
.
delete
(
"month:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
)
day
=
(
pynutil
.
delete
(
"day:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
)
year
=
(
pynutil
.
delete
(
"year:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
)
# month (day) year
graph_mdy
=
(
month
+
pynini
.
closure
(
delete_extra_space
+
day
,
0
,
1
)
+
pynini
.
closure
(
delete_extra_space
+
year
,
0
,
1
)
)
# (day) month year
graph_dmy
=
(
pynini
.
closure
(
day
+
delete_extra_space
,
0
,
1
)
+
month
+
pynini
.
closure
(
delete_extra_space
+
year
,
0
,
1
)
)
optional_preserve_order
=
pynini
.
closure
(
pynutil
.
delete
(
"preserve_order:"
)
+
delete_space
+
pynutil
.
delete
(
"true"
)
+
delete_space
|
pynutil
.
delete
(
"field_order:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
DAMO_NOT_QUOTE
+
pynutil
.
delete
(
'"'
)
+
delete_space
)
final_graph
=
(
graph_mdy
|
year
|
graph_dmy
)
+
delete_space
+
optional_preserve_order
delete_tokens
=
self
.
delete_tokens
(
final_graph
)
self
.
fst
=
delete_tokens
.
optimize
()
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/decimal.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.text_normalization.en.graph_utils
import
(
DAMO_NOT_QUOTE
,
GraphFst
,
delete_space
,
)
from
pynini.lib
import
pynutil
class
DecimalFst
(
GraphFst
):
"""
Finite state transducer for verbalizing decimal, e.g.
decimal { negative: "true" integer_part: "12" fractional_part: "5006" quantity: "billion" } -> -12.5006 billion
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"decimal"
,
kind
=
"verbalize"
)
optionl_sign
=
pynini
.
closure
(
pynini
.
cross
(
'negative: "true"'
,
"-"
)
+
delete_space
,
0
,
1
)
integer
=
(
pynutil
.
delete
(
"integer_part:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
)
optional_integer
=
pynini
.
closure
(
integer
+
delete_space
,
0
,
1
)
fractional
=
(
pynutil
.
insert
(
"."
)
+
pynutil
.
delete
(
"fractional_part:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
)
optional_fractional
=
pynini
.
closure
(
fractional
+
delete_space
,
0
,
1
)
quantity
=
(
pynutil
.
delete
(
"quantity:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
)
optional_quantity
=
pynini
.
closure
(
pynutil
.
insert
(
" "
)
+
quantity
+
delete_space
,
0
,
1
)
graph
=
optional_integer
+
optional_fractional
+
optional_quantity
self
.
numbers
=
graph
graph
=
optionl_sign
+
graph
delete_tokens
=
self
.
delete_tokens
(
graph
)
self
.
fst
=
delete_tokens
.
optimize
()
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/electronic.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.text_normalization.en.graph_utils
import
(
DAMO_NOT_QUOTE
,
GraphFst
,
delete_space
,
)
from
pynini.lib
import
pynutil
class
ElectronicFst
(
GraphFst
):
"""
Finite state transducer for verbalizing electronic
e.g. tokens { electronic { username: "cdf1" domain: "abc.edu" } } -> cdf1@abc.edu
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"electronic"
,
kind
=
"verbalize"
)
user_name
=
(
pynutil
.
delete
(
"username:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
)
domain
=
(
pynutil
.
delete
(
"domain:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
)
protocol
=
(
pynutil
.
delete
(
"protocol:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
)
graph
=
user_name
+
delete_space
+
pynutil
.
insert
(
"@"
)
+
domain
graph
|=
protocol
delete_tokens
=
self
.
delete_tokens
(
graph
)
self
.
fst
=
delete_tokens
.
optimize
()
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/fraction.py
0 → 100644
View file @
70a8a9e0
from
fun_text_processing.text_normalization.en.graph_utils
import
GraphFst
class
FractionFst
(
GraphFst
):
"""
Finite state transducer for verbalizing fraction,
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"fraction"
,
kind
=
"verbalize"
)
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/measure.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.text_normalization.en.graph_utils
import
DAMO_CHAR
,
GraphFst
,
delete_space
from
pynini.lib
import
pynutil
class
MeasureFst
(
GraphFst
):
"""
Finite state transducer for verbalizing measure, e.g.
measure { negative: "true" cardinal { integer: "12" } units: "kg" } -> -12 kg
Args:
decimal: DecimalFst
cardinal: CardinalFst
"""
def
__init__
(
self
,
decimal
:
GraphFst
,
cardinal
:
GraphFst
):
super
().
__init__
(
name
=
"measure"
,
kind
=
"verbalize"
)
optional_sign
=
pynini
.
closure
(
pynini
.
cross
(
'negative: "true"'
,
"-"
),
0
,
1
)
unit
=
(
pynutil
.
delete
(
"units:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_CHAR
-
" "
,
1
)
+
pynutil
.
delete
(
'"'
)
+
delete_space
)
graph_decimal
=
(
pynutil
.
delete
(
"decimal {"
)
+
delete_space
+
optional_sign
+
delete_space
+
decimal
.
numbers
+
delete_space
+
pynutil
.
delete
(
"}"
)
)
graph_cardinal
=
(
pynutil
.
delete
(
"cardinal {"
)
+
delete_space
+
optional_sign
+
delete_space
+
cardinal
.
numbers
+
delete_space
+
pynutil
.
delete
(
"}"
)
)
graph
=
(
graph_cardinal
|
graph_decimal
)
+
delete_space
+
pynutil
.
insert
(
" "
)
+
unit
delete_tokens
=
self
.
delete_tokens
(
graph
)
self
.
fst
=
delete_tokens
.
optimize
()
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/money.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.text_normalization.en.graph_utils
import
DAMO_CHAR
,
GraphFst
,
delete_space
from
pynini.lib
import
pynutil
class
MoneyFst
(
GraphFst
):
"""
Finite state transducer for verbalizing money, e.g.
money { integer_part: "12" fractional_part: "05" currency: "$" } -> $12.05
Args:
decimal: DecimalFst
"""
def
__init__
(
self
,
decimal
:
GraphFst
):
super
().
__init__
(
name
=
"money"
,
kind
=
"verbalize"
)
unit
=
(
pynutil
.
delete
(
"currency:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_CHAR
-
" "
,
1
)
+
pynutil
.
delete
(
'"'
)
)
graph
=
unit
+
delete_space
+
decimal
.
numbers
delete_tokens
=
self
.
delete_tokens
(
graph
)
self
.
fst
=
delete_tokens
.
optimize
()
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/ordinal.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.text_normalization.en.graph_utils
import
(
DAMO_NOT_QUOTE
,
DAMO_SIGMA
,
GraphFst
,
delete_space
,
)
from
pynini.lib
import
pynutil
class
OrdinalFst
(
GraphFst
):
"""
Finite state transducer for verbalizing ordinal, e.g.
ordinal { integer: "13" } -> 13th
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"ordinal"
,
kind
=
"verbalize"
)
graph
=
(
pynutil
.
delete
(
"integer:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
)
convert_eleven
=
pynini
.
cross
(
"11"
,
"11th"
)
convert_twelve
=
pynini
.
cross
(
"12"
,
"12th"
)
convert_thirteen
=
pynini
.
cross
(
"13"
,
"13th"
)
convert_one
=
pynini
.
cross
(
"1"
,
"1st"
)
convert_two
=
pynini
.
cross
(
"2"
,
"2nd"
)
convert_three
=
pynini
.
cross
(
"3"
,
"3rd"
)
convert_rest
=
pynutil
.
insert
(
"th"
,
weight
=
0.01
)
suffix
=
pynini
.
cdrewrite
(
convert_eleven
|
convert_twelve
|
convert_thirteen
|
convert_one
|
convert_two
|
convert_three
|
convert_rest
,
""
,
"[EOS]"
,
DAMO_SIGMA
,
)
graph
=
graph
@
suffix
delete_tokens
=
self
.
delete_tokens
(
graph
)
self
.
fst
=
delete_tokens
.
optimize
()
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/telephone.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.text_normalization.en.graph_utils
import
DAMO_NOT_QUOTE
,
GraphFst
from
pynini.lib
import
pynutil
class
TelephoneFst
(
GraphFst
):
"""
Finite state transducer for verbalizing telephone, e.g.
telephone { number_part: "123-123-5678" }
-> 123-123-5678
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"telephone"
,
kind
=
"verbalize"
)
number_part
=
(
pynutil
.
delete
(
'number_part: "'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
)
optional_country_code
=
pynini
.
closure
(
pynutil
.
delete
(
'country_code: "'
)
+
pynini
.
closure
(
DAMO_NOT_QUOTE
,
1
)
+
pynutil
.
delete
(
'"'
)
+
pynini
.
accep
(
" "
),
0
,
1
,
)
delete_tokens
=
self
.
delete_tokens
(
optional_country_code
+
number_part
)
self
.
fst
=
delete_tokens
.
optimize
()
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/time.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.text_normalization.en.graph_utils
import
(
DAMO_CHAR
,
DAMO_DIGIT
,
GraphFst
,
delete_space
,
insert_space
,
)
from
pynini.lib
import
pynutil
class
TimeFst
(
GraphFst
):
"""
Finite state transducer for verbalizing time, e.g.
time { hours: "12" minutes: "30" } -> 12:30
time { hours: "1" minutes: "12" } -> 01:12
time { hours: "2" suffix: "a.m." } -> 02:00 a.m.
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"time"
,
kind
=
"verbalize"
)
add_leading_zero_to_double_digit
=
(
DAMO_DIGIT
+
DAMO_DIGIT
)
|
(
pynutil
.
insert
(
"0"
)
+
DAMO_DIGIT
)
hour
=
(
pynutil
.
delete
(
"hours:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_DIGIT
,
1
)
+
pynutil
.
delete
(
'"'
)
)
minute
=
(
pynutil
.
delete
(
"minutes:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_DIGIT
,
1
)
+
pynutil
.
delete
(
'"'
)
)
suffix
=
(
delete_space
+
insert_space
+
pynutil
.
delete
(
"suffix:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_CHAR
-
" "
,
1
)
+
pynutil
.
delete
(
'"'
)
)
optional_suffix
=
pynini
.
closure
(
suffix
,
0
,
1
)
zone
=
(
delete_space
+
insert_space
+
pynutil
.
delete
(
"zone:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_CHAR
-
" "
,
1
)
+
pynutil
.
delete
(
'"'
)
)
optional_zone
=
pynini
.
closure
(
zone
,
0
,
1
)
graph
=
(
hour
@
add_leading_zero_to_double_digit
+
delete_space
+
pynutil
.
insert
(
":"
)
+
(
minute
@
add_leading_zero_to_double_digit
)
+
optional_suffix
+
optional_zone
)
delete_tokens
=
self
.
delete_tokens
(
graph
)
self
.
fst
=
delete_tokens
.
optimize
()
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/verbalize.py
0 → 100644
View file @
70a8a9e0
from
fun_text_processing.inverse_text_normalization.en.verbalizers.cardinal
import
CardinalFst
from
fun_text_processing.inverse_text_normalization.en.verbalizers.date
import
DateFst
from
fun_text_processing.inverse_text_normalization.en.verbalizers.decimal
import
DecimalFst
from
fun_text_processing.inverse_text_normalization.en.verbalizers.electronic
import
ElectronicFst
from
fun_text_processing.inverse_text_normalization.en.verbalizers.measure
import
MeasureFst
from
fun_text_processing.inverse_text_normalization.en.verbalizers.money
import
MoneyFst
from
fun_text_processing.inverse_text_normalization.en.verbalizers.ordinal
import
OrdinalFst
from
fun_text_processing.inverse_text_normalization.en.verbalizers.telephone
import
TelephoneFst
from
fun_text_processing.inverse_text_normalization.en.verbalizers.time
import
TimeFst
from
fun_text_processing.inverse_text_normalization.en.verbalizers.whitelist
import
WhiteListFst
from
fun_text_processing.text_normalization.en.graph_utils
import
GraphFst
class
VerbalizeFst
(
GraphFst
):
"""
Composes other verbalizer grammars.
For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File.
More details to deployment at NeMo/tools/text_processing_deployment.
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"verbalize"
,
kind
=
"verbalize"
)
cardinal
=
CardinalFst
()
cardinal_graph
=
cardinal
.
fst
ordinal_graph
=
OrdinalFst
().
fst
decimal
=
DecimalFst
()
decimal_graph
=
decimal
.
fst
measure_graph
=
MeasureFst
(
decimal
=
decimal
,
cardinal
=
cardinal
).
fst
money_graph
=
MoneyFst
(
decimal
=
decimal
).
fst
time_graph
=
TimeFst
().
fst
date_graph
=
DateFst
().
fst
whitelist_graph
=
WhiteListFst
().
fst
telephone_graph
=
TelephoneFst
().
fst
electronic_graph
=
ElectronicFst
().
fst
graph
=
(
time_graph
|
date_graph
|
money_graph
|
measure_graph
|
ordinal_graph
|
decimal_graph
|
cardinal_graph
|
whitelist_graph
|
telephone_graph
|
electronic_graph
)
self
.
fst
=
graph
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/verbalize_final.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.inverse_text_normalization.en.verbalizers.verbalize
import
VerbalizeFst
from
fun_text_processing.inverse_text_normalization.en.verbalizers.word
import
WordFst
from
fun_text_processing.text_normalization.en.graph_utils
import
(
GraphFst
,
delete_extra_space
,
delete_space
,
)
from
pynini.lib
import
pynutil
class
VerbalizeFinalFst
(
GraphFst
):
"""
Finite state transducer that verbalizes an entire sentence, e.g.
tokens { name: "its" } tokens { time { hours: "12" minutes: "30" } } tokens { name: "now" } -> its 12:30 now
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"verbalize_final"
,
kind
=
"verbalize"
)
verbalize
=
VerbalizeFst
().
fst
word
=
WordFst
().
fst
types
=
verbalize
|
word
graph
=
(
pynutil
.
delete
(
"tokens"
)
+
delete_space
+
pynutil
.
delete
(
"{"
)
+
delete_space
+
types
+
delete_space
+
pynutil
.
delete
(
"}"
)
)
graph
=
delete_space
+
pynini
.
closure
(
graph
+
delete_extra_space
)
+
graph
+
delete_space
self
.
fst
=
graph
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/whitelist.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.text_normalization.en.graph_utils
import
(
DAMO_CHAR
,
DAMO_SIGMA
,
GraphFst
,
delete_space
,
)
from
pynini.lib
import
pynutil
class
WhiteListFst
(
GraphFst
):
"""
Finite state transducer for verbalizing whitelist
e.g. tokens { name: "mrs." } -> mrs.
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"whitelist"
,
kind
=
"verbalize"
)
graph
=
(
pynutil
.
delete
(
"name:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
pynini
.
closure
(
DAMO_CHAR
-
" "
,
1
)
+
pynutil
.
delete
(
'"'
)
)
graph
=
graph
@
pynini
.
cdrewrite
(
pynini
.
cross
(
"
\u00A0
"
,
" "
),
""
,
""
,
DAMO_SIGMA
)
self
.
fst
=
graph
.
optimize
()
FunASR/fun_text_processing/inverse_text_normalization/en/verbalizers/word.py
0 → 100644
View file @
70a8a9e0
import
pynini
from
fun_text_processing.text_normalization.en.graph_utils
import
(
DAMO_CHAR
,
DAMO_SIGMA
,
GraphFst
,
delete_space
,
)
from
pynini.lib
import
pynutil
class
WordFst
(
GraphFst
):
"""
Finite state transducer for verbalizing plain tokens
e.g. tokens { name: "sleep" } -> sleep
"""
def
__init__
(
self
):
super
().
__init__
(
name
=
"word"
,
kind
=
"verbalize"
)
chars
=
pynini
.
closure
(
DAMO_CHAR
-
" "
,
1
)
char
=
(
pynutil
.
delete
(
"name:"
)
+
delete_space
+
pynutil
.
delete
(
'"'
)
+
chars
+
pynutil
.
delete
(
'"'
)
)
graph
=
char
@
pynini
.
cdrewrite
(
pynini
.
cross
(
"
\u00A0
"
,
" "
),
""
,
""
,
DAMO_SIGMA
)
self
.
fst
=
graph
.
optimize
()
FunASR/fun_text_processing/inverse_text_normalization/es/__init__.py
0 → 100644
View file @
70a8a9e0
from
fun_text_processing.inverse_text_normalization.es.taggers.tokenize_and_classify
import
(
ClassifyFst
,
)
from
fun_text_processing.inverse_text_normalization.es.verbalizers.verbalize
import
VerbalizeFst
from
fun_text_processing.inverse_text_normalization.es.verbalizers.verbalize_final
import
(
VerbalizeFinalFst
,
)
FunASR/fun_text_processing/inverse_text_normalization/es/data/__init__.py
0 → 100644
View file @
70a8a9e0
FunASR/fun_text_processing/inverse_text_normalization/es/data/currency_plural.tsv
0 → 100644
View file @
70a8a9e0
€ euros
US$ dólares estadounidenses
US$ dólares americanos
$ dólares
$ pesos
¥ yenes
\ No newline at end of file
FunASR/fun_text_processing/inverse_text_normalization/es/data/currency_singular.tsv
0 → 100644
View file @
70a8a9e0
€ euro
US$ dólar estadounidense
US$ dólar americano
$ dólar
$ peso
¥ yen
\ No newline at end of file
FunASR/fun_text_processing/inverse_text_normalization/es/data/electronic/__init__.py
0 → 100644
View file @
70a8a9e0
FunASR/fun_text_processing/inverse_text_normalization/es/data/electronic/domain.tsv
0 → 100644
View file @
70a8a9e0
com
es
uk
fr
net
br
in
ru
de
it
edu
co
ar
bo
cl
co
ec
fk
gf
fy
pe
py
sr
ve
uy
\ No newline at end of file
Prev
1
…
14
15
16
17
18
19
20
21
22
…
42
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment