initial commit

70a8a9e0 · wangwei990215 · 70a8a9e0 · 70a8a9e0 · 70a8a9e0 · 70a8a9e0
Commit 70a8a9e0 authored Oct 03, 2024 by wangwei990215
20 changed files
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/data/ordinals/hundreds.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/data/ordinals/hundreds.tsv
+centésimo	1
+centésima	1
+ducentésimo	2
+ducentésima	2
+tricentésimo	3
+tricentésima	3
+trecentésimo	3
+trecentésima	3
+quadringentésimo	4
+quadringentésima	4
+quingentésimo	5
+quingentésima	5
+sexcentésimo	6
+sexcentésima	6
+seiscentésimo	6
+seiscentésima	6
+septingentésimo	7
+septingentésima	7
+setingentésimo	7
+setingentésima	7
+octingentésimo	8
+octingentésima	8
+octogentésimo	8
+octogentésima	8
+noningentésimo	9
+noningentésima	9
+nongentésimo	9
+nongentésima	9
\ No newline at end of file
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/data/ordinals/ties.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/data/ordinals/ties.tsv
+décimo	1
+décima	1
+vigésimo	2
+vigésima	2
+trigésimo	3
+trigésima	3
+quadragésimo	4
+quadragésima	4
+quinquagésimo	5
+quinquagésima	5
+sexagésimo	6
+sexagésima	6
+septuagésimo	7
+septuagésima	7
+setuagésimo	7
+setuagésima	7
+octogésimo	8
+octogésima	8
+nonagésimo	9
+nonagésima	9
\ No newline at end of file
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/__init__.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/__init__.py
+
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/hour_to_am.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/hour_to_am.tsv
+1	0
\ No newline at end of file
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/hour_to_pm.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/hour_to_pm.tsv
+1	12
\ No newline at end of file
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/hours_to.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/hours_to.tsv
+0	23
+2	1
+3	2
+4	3
+5	4
+6	5
+7	6
+8	7
+9	8
+10	9
+11	10
+12	11
+13	12
+14	13
+15	14
+16	15
+17	16
+18	17
+19	18
+20	19
+21	20
+22	21
+23	22
\ No newline at end of file
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/minutes_to.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/minutes_to.tsv
+01	59
+02	58
+03	57
+04	56
+05	55
+06	54
+07	53
+08	52
+09	51
+10	50
+11	49
+12	48
+13	47
+14	46
+15	45
+16	44
+17	43
+18	42
+19	41
+20	40
+21	39
+22	38
+23	37
+24	36
+25	35
+26	34
+27	33
+28	32
+29	31
+30	30
+31	29
+32	28
+33	27
+34	26
+35	25
+36	24
+37	23
+38	22
+39	21
+40	20
+41	19
+42	18
+43	17
+44	16
+45	15
+46	14
+47	13
+48	12
+49	11
+50	10
+51	09
+52	08
+53	07
+54	06
+55	05
+56	04
+57	03
+58	02
+59	01
\ No newline at end of file
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/time_suffix_am.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/time_suffix_am.tsv
+da madrugada	da madrugada
+da manhã	da manhã
\ No newline at end of file
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/time_suffix_pm.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/data/time/time_suffix_pm.tsv
+da tarde	da tarde
+da noite	da noite
\ No newline at end of file
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/data/whitelist.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/data/whitelist.tsv
+segunda-feira	segunda feira
+terça-feira	terça feira
+quarta-feira	quarta feira
+quinta-feira	quinta feira
+sexta-feira	sexta feira
\ No newline at end of file
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/__init__.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/__init__.py
+
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/cardinal.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/cardinal.py
+import pynini
+from fun_text_processing.inverse_text_normalization.pt.utils import get_abs_path
+from fun_text_processing.text_normalization.en.graph_utils import (
+    DAMO_ALPHA,
+    DAMO_DIGIT,
+    DAMO_SIGMA,
+    DAMO_SPACE,
+    DAMO_WHITE_SPACE,
+    GraphFst,
+    delete_space,
+)
+from pynini.lib import pynutil
+
+
+class CardinalFst(GraphFst):
+    """
+    Finite state transducer for classifying cardinals
+        e.g. menos veintitrés -> cardinal { negative: "-" integer: "23"}
+    This class converts cardinals up to (but not including) "un cuatrillón",
+    i.e up to "one septillion" in English (10^{24}).
+    Cardinals below ten are not converted (in order to avoid
+    "vivo em uma casa" --> "vivo em 1 casa" and any other odd conversions.)
+
+    Although technically Portuguese grammar requires that "e" only comes after
+    "10s" numbers (ie. "trinta", ..., "noventa"), these rules will convert
+    numbers even with "e" in an ungrammatical place (because "e" is ignored
+    inside cardinal numbers).
+        e.g. "mil e uma" -> cardinal { integer: "1001"}
+        e.g. "cento e uma" -> cardinal { integer: "101"}
+    """
+
+    def __init__(self, use_strict_e=False):
+        """
+        :param use_strict_e: When True forces to have the separator "e" in the right places
+        """
+        super().__init__(name="cardinal", kind="classify")
+        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+        graph_ties = pynini.string_file(get_abs_path("data/numbers/ties.tsv"))
+        graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
+        graph_twenties = pynini.string_file(get_abs_path("data/numbers/twenties.tsv"))
+        graph_one_hundred = pynini.string_file(get_abs_path("data/numbers/onehundred.tsv"))
+        graph_hundreds = pynini.string_file(get_abs_path("data/numbers/hundreds.tsv"))
+
+        graph = None
+
+        if not use_strict_e:
+            graph_hundred_component = graph_hundreds | pynutil.insert("0")
+            graph_hundred_component += delete_space
+            graph_hundred_component += pynini.union(
+                graph_twenties | graph_teen | pynutil.insert("00"),
+                (graph_ties | pynutil.insert("0"))
+                + delete_space
+                + (graph_digit | pynutil.insert("0")),
+            )
+            graph_hundred_component = pynini.union(graph_hundred_component, graph_one_hundred)
+
+            graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ (
+                pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT)
+            )
+
+            graph_thousands = pynini.union(
+                graph_hundred_component_at_least_one_none_zero_digit
+                + delete_space
+                + pynutil.delete("mil"),
+                pynutil.insert("001")
+                + pynutil.delete("mil"),  # because we say 'mil', not 'hum mil'
+                pynutil.insert("000", weight=0.01),
+            )
+
+            graph_milhoes = pynini.union(
+                graph_hundred_component_at_least_one_none_zero_digit
+                + delete_space
+                + (pynutil.delete("milhão") | pynutil.delete("milhões")),
+                pynutil.insert("000", weight=0.01),
+            )
+
+            graph_bilhoes = pynini.union(
+                graph_hundred_component_at_least_one_none_zero_digit
+                + delete_space
+                + (pynutil.delete("bilhão") | pynutil.delete("bilhões")),
+                pynutil.insert("000", weight=0.01),
+            )
+
+            graph_trilhoes = pynini.union(
+                graph_hundred_component_at_least_one_none_zero_digit
+                + delete_space
+                + (pynutil.delete("trilhão") | pynutil.delete("trilhões")),
+                pynutil.insert("000", weight=0.01),
+            )
+
+            graph_quatrilhoes = pynini.union(
+                graph_hundred_component_at_least_one_none_zero_digit
+                + delete_space
+                + (pynutil.delete("quatrilhão") | pynutil.delete("quatrilhões")),
+                pynutil.insert("000", weight=0.01),
+            )
+
+            graph_quintilhoes = pynini.union(
+                graph_hundred_component_at_least_one_none_zero_digit
+                + delete_space
+                + (pynutil.delete("quintilhão") | pynutil.delete("quintilhões")),
+                pynutil.insert("000", weight=0.01),
+            )
+
+            graph_sextilhoes = pynini.union(
+                graph_hundred_component_at_least_one_none_zero_digit
+                + delete_space
+                + (pynutil.delete("sextilhão") | pynutil.delete("sextilhões")),
+                pynutil.insert("000", weight=0.01),
+            )
+
+            graph = pynini.union(
+                graph_sextilhoes
+                + delete_space
+                + graph_quintilhoes
+                + delete_space
+                + graph_quatrilhoes
+                + delete_space
+                + graph_trilhoes
+                + delete_space
+                + graph_bilhoes
+                + delete_space
+                + graph_milhoes
+                + delete_space
+                + graph_thousands
+                + delete_space
+                + graph_hundred_component,
+                graph_zero,
+            )
+
+            graph = graph @ pynini.union(
+                pynutil.delete(pynini.closure("0"))
+                + pynini.difference(DAMO_DIGIT, "0")
+                + pynini.closure(DAMO_DIGIT),
+                "0",
+            )
+
+            graph = (
+                pynini.cdrewrite(pynutil.delete("e"), DAMO_SPACE, DAMO_SPACE, DAMO_SIGMA)
+                @ (DAMO_ALPHA + DAMO_SIGMA)
+                @ graph
+            )
+
+        else:
+            graph_e = (
+                pynutil.delete(DAMO_WHITE_SPACE.plus)
+                + pynutil.delete("e")
+                + pynutil.delete(DAMO_WHITE_SPACE.plus)
+            )
+
+            graph_ties_component = pynini.union(
+                graph_teen | graph_twenties,
+                graph_ties + ((graph_e + graph_digit) | pynutil.insert("0")),
+                pynutil.add_weight(pynutil.insert("0") + graph_digit, 0.1),
+            ) @ (pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT))
+
+            graph_hundreds_except_hundred = (
+                pynini.project(graph_hundreds, "input") - "cento"
+            ) @ graph_hundreds
+
+            graph_hundred_component_prefix_e = pynini.union(
+                graph_one_hundred,
+                pynutil.add_weight(graph_hundreds_except_hundred + pynutil.insert("00"), 0.1),
+                pynutil.insert("0") + graph_ties_component,
+            ) @ (pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT))
+            graph_hundred_component_prefix_e = graph_hundred_component_prefix_e.optimize()
+
+            graph_hundred_component_no_prefix = pynini.union(
+                graph_hundreds + graph_e + graph_ties_component,
+            ) @ (pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT))
+            graph_hundred_component_no_prefix = graph_hundred_component_no_prefix.optimize()
+
+            graph_mil_prefix_e = pynini.union(
+                # because we say 'mil', not 'hum mil'
+                (
+                    (graph_hundred_component_prefix_e + delete_space + pynutil.delete("mil"))
+                    | (pynutil.insert("001", weight=0.1) + pynutil.delete("mil"))
+                )
+                + (
+                    (graph_e + graph_hundred_component_prefix_e)
+                    | (delete_space + graph_hundred_component_no_prefix)
+                    | pynutil.insert("000", weight=0.1)
+                )
+            )
+
+            graph_mil_no_prefix = pynini.union(
+                (
+                    (graph_hundred_component_no_prefix + delete_space + pynutil.delete("mil"))
+                    | pynutil.insert("000", weight=0.1)
+                )
+                + (
+                    (graph_e + graph_hundred_component_prefix_e)
+                    | (delete_space + graph_hundred_component_no_prefix)
+                    | pynutil.insert("000", weight=0.1)
+                )
+            )
+
+            graph_milhao_prefix_e = pynini.union(
+                (
+                    graph_hundred_component_prefix_e
+                    + delete_space
+                    + (pynutil.delete("milhão") | pynutil.delete("milhões"))
+                )
+                + ((graph_e + graph_mil_prefix_e) | (delete_space + graph_mil_no_prefix))
+            )
+
+            graph_milhao_no_prefix = pynini.union(
+                (
+                    (
+                        graph_hundred_component_no_prefix
+                        + delete_space
+                        + (pynutil.delete("milhão") | pynutil.delete("milhões"))
+                    )
+                    | pynutil.insert("000", weight=0.1)
+                )
+                + ((graph_e + graph_mil_prefix_e) | (delete_space + graph_mil_no_prefix))
+            )
+
+            graph_bilhao_prefix_e = pynini.union(
+                (
+                    graph_hundred_component_prefix_e
+                    + delete_space
+                    + (pynutil.delete("bilhão") | pynutil.delete("bilhões"))
+                )
+                + ((graph_e + graph_milhao_prefix_e) | (delete_space + graph_milhao_no_prefix))
+            )
+
+            graph_bilhao_no_prefix = pynini.union(
+                (
+                    (
+                        graph_hundred_component_no_prefix
+                        + delete_space
+                        + (pynutil.delete("bilhão") | pynutil.delete("bilhões"))
+                    )
+                    | pynutil.insert("000", weight=0.1)
+                )
+                + ((graph_e + graph_milhao_prefix_e) | (delete_space + graph_milhao_no_prefix))
+            )
+
+            graph_trilhao_prefix_e = pynini.union(
+                (
+                    graph_hundred_component_prefix_e
+                    + delete_space
+                    + (pynutil.delete("trilhão") | pynutil.delete("trilhões"))
+                )
+                + ((graph_e + graph_bilhao_prefix_e) | (delete_space + graph_bilhao_no_prefix))
+            )
+
+            graph_trilhao_no_prefix = pynini.union(
+                (
+                    (
+                        graph_hundred_component_no_prefix
+                        + delete_space
+                        + (pynutil.delete("trilhão") | pynutil.delete("trilhões"))
+                    )
+                    | pynutil.insert("000", weight=0.1)
+                )
+                + ((graph_e + graph_bilhao_prefix_e) | (delete_space + graph_bilhao_no_prefix))
+            )
+
+            graph_quatrilhao_prefix_e = pynini.union(
+                (
+                    graph_hundred_component_prefix_e
+                    + delete_space
+                    + (pynutil.delete("quatrilhão") | pynutil.delete("quatrilhões"))
+                )
+                + ((graph_e + graph_trilhao_prefix_e) | (delete_space + graph_trilhao_no_prefix))
+            )
+
+            graph_quatrilhao_no_prefix = pynini.union(
+                (
+                    (
+                        graph_hundred_component_no_prefix
+                        + delete_space
+                        + (pynutil.delete("quatrilhão") | pynutil.delete("quatrilhões"))
+                    )
+                    | pynutil.insert("000", weight=0.1)
+                )
+                + ((graph_e + graph_trilhao_prefix_e) | (delete_space + graph_trilhao_no_prefix))
+            )
+
+            graph_quintilhao_prefix_e = pynini.union(
+                (
+                    graph_hundred_component_prefix_e
+                    + delete_space
+                    + (pynutil.delete("quintilhão") | pynutil.delete("quintilhões"))
+                )
+                + (
+                    (graph_e + graph_quatrilhao_prefix_e)
+                    | (delete_space + graph_quatrilhao_no_prefix)
+                )
+            )
+
+            graph_quintilhao_no_prefix = pynini.union(
+                (
+                    (
+                        graph_hundred_component_no_prefix
+                        + delete_space
+                        + (pynutil.delete("quintilhão") | pynutil.delete("quintilhões"))
+                    )
+                    | pynutil.insert("000", weight=0.1)
+                )
+                + (
+                    (graph_e + graph_quatrilhao_prefix_e)
+                    | (delete_space + graph_quatrilhao_no_prefix)
+                )
+            )
+
+            graph_sextilhao_prefix_e = pynini.union(
+                (
+                    graph_hundred_component_prefix_e
+                    + delete_space
+                    + (pynutil.delete("sextilhão") | pynutil.delete("sextilhões"))
+                )
+                + (
+                    (graph_e + graph_quintilhao_prefix_e)
+                    | (delete_space + graph_quintilhao_no_prefix)
+                )
+            )
+
+            graph_sextilhao_no_prefix = pynini.union(
+                (
+                    (
+                        graph_hundred_component_no_prefix
+                        + delete_space
+                        + (pynutil.delete("sextilhão") | pynutil.delete("sextilhões"))
+                    )
+                    | pynutil.insert("000", weight=0.1)
+                )
+                + (
+                    (graph_e + graph_quintilhao_prefix_e)
+                    | (delete_space + graph_quintilhao_no_prefix)
+                )
+            )
+
+            graph = pynini.union(
+                graph_sextilhao_no_prefix,
+                graph_sextilhao_prefix_e,
+                graph_quintilhao_prefix_e,
+                graph_quatrilhao_prefix_e,
+                graph_trilhao_prefix_e,
+                graph_bilhao_prefix_e,
+                graph_milhao_prefix_e,
+                graph_mil_prefix_e,
+                graph_hundred_component_prefix_e,
+                graph_ties_component,
+                graph_zero,
+            ).optimize()
+
+            graph = graph @ pynini.union(
+                pynutil.delete(pynini.closure("0"))
+                + pynini.difference(DAMO_DIGIT, "0")
+                + pynini.closure(DAMO_DIGIT),
+                "0",
+            )
+
+        graph = graph.optimize()
+        self.graph_no_exception = graph
+
+        # save self.numbers_up_to_thousand for use in DecimalFst
+        digits_up_to_thousand = DAMO_DIGIT | (DAMO_DIGIT**2) | (DAMO_DIGIT**3)
+        numbers_up_to_thousand = pynini.compose(graph, digits_up_to_thousand).optimize()
+        self.numbers_up_to_thousand = numbers_up_to_thousand
+
+        # save self.numbers_up_to_million for use in DecimalFst
+        digits_up_to_million = (
+            DAMO_DIGIT
+            | (DAMO_DIGIT**2)
+            | (DAMO_DIGIT**3)
+            | (DAMO_DIGIT**4)
+            | (DAMO_DIGIT**5)
+            | (DAMO_DIGIT**6)
+        )
+        numbers_up_to_million = pynini.compose(graph, digits_up_to_million).optimize()
+        self.numbers_up_to_million = numbers_up_to_million
+
+        # save self.digits_from_year for use in DateFst
+        digits_1_2099 = [str(digits) for digits in range(1, 2100)]
+        digits_from_year = (numbers_up_to_million @ pynini.union(*digits_1_2099)).optimize()
+        self.digits_from_year = digits_from_year
+
+        # don't convert cardinals from zero to nine inclusive
+        graph_exception = pynini.project(pynini.union(graph_digit, graph_zero), "input")
+
+        self.graph = (pynini.project(graph, "input") - graph_exception.arcsort()) @ graph
+
+        optional_minus_graph = pynini.closure(
+            pynutil.insert("negative: ") + pynini.cross("menos", '"-"') + DAMO_SPACE, 0, 1
+        )
+
+        final_graph = (
+            optional_minus_graph + pynutil.insert('integer: "') + self.graph + pynutil.insert('"')
+        )
+
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/date.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/date.py
+import pynini
+from fun_text_processing.inverse_text_normalization.pt.utils import get_abs_path
+from fun_text_processing.text_normalization.en.graph_utils import (
+    GraphFst,
+    delete_extra_space,
+    delete_space,
+)
+from pynini.lib import pynutil
+
+
+class DateFst(GraphFst):
+    """
+    Finite state transducer for classifying date,
+        e.g. primeiro de janeiro -> date { day: "1" month: "janeiro" }
+        e.g. um de janeiro -> date { day: "1" month: "janeiro" }
+    """
+
+    def __init__(self, cardinal: GraphFst):
+        super().__init__(name="date", kind="classify")
+
+        digits_from_year = cardinal.digits_from_year
+
+        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+        graph_ties = pynini.string_file(get_abs_path("data/numbers/ties.tsv"))
+        graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
+        graph_twenties = pynini.string_file(get_abs_path("data/numbers/twenties.tsv"))
+
+        graph_1_to_100 = pynini.union(
+            pynutil.insert("0") + graph_digit,
+            graph_twenties,
+            graph_teen,
+            (graph_ties + pynutil.insert("0")),
+            (graph_ties + pynutil.delete(" e ") + graph_digit),
+        )
+
+        digits_1_to_31 = [str("{:0>2d}").format(digits) for digits in range(1, 32)]
+        graph_1_to_31 = graph_1_to_100 @ pynini.union(*digits_1_to_31)
+        # can use "primeiro" for 1st day of the month
+        graph_1_to_31 = pynini.union(graph_1_to_31, pynini.cross("primeiro", "01"))
+
+        day_graph = pynutil.insert('day: "') + graph_1_to_31 + pynutil.insert('"')
+
+        month_name_graph = pynini.string_file(get_abs_path("data/months.tsv"))
+        month_name_graph = pynutil.insert('month: "') + month_name_graph + pynutil.insert('"')
+
+        # vinte do oito -> 20/08
+        digits_1_to_12 = [str("{:0>2d}").format(digits) for digits in range(1, 13)]
+        graph_1_to_12 = graph_1_to_100 @ pynini.union(*digits_1_to_12)
+        month_number_graph = pynutil.insert('month: "') + graph_1_to_12 + pynutil.insert('"')
+
+        graph_dm = (
+            day_graph + delete_space + pynutil.delete("de") + delete_extra_space + month_name_graph
+        )
+
+        graph_dm |= (
+            day_graph
+            + delete_space
+            + pynutil.delete("do")
+            + delete_extra_space
+            + month_number_graph
+            + pynutil.insert(' morphosyntactic_features: "/"')
+        )
+
+        graph_year = (
+            delete_space
+            + pynutil.delete("de")
+            + delete_extra_space
+            + pynutil.insert('year: "')
+            + digits_from_year
+            + pynutil.insert('"')
+        )
+        graph_dmy = graph_dm + graph_year.ques
+
+        final_graph = graph_dmy
+        final_graph += pynutil.insert(" preserve_order: true")
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/decimal.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/decimal.py
+import pynini
+from fun_text_processing.inverse_text_normalization.pt.utils import get_abs_path
+from fun_text_processing.text_normalization.en.graph_utils import (
+    DAMO_DIGIT,
+    GraphFst,
+    delete_extra_space,
+    delete_space,
+)
+from pynini.lib import pynutil
+
+
+def get_quantity(
+    decimal: "pynini.FstLike", cardinal_up_to_million: "pynini.FstLike"
+) -> "pynini.FstLike":
+    """
+    Returns FST that transforms either a cardinal or decimal followed by a quantity into a numeral,
+    e.g. one million -> integer_part: "1" quantity: "million"
+    e.g. one point five million -> integer_part: "1" fractional_part: "5" quantity: "million"
+
+    Args:
+        decimal: decimal FST
+        cardinal_up_to_million: cardinal FST
+    """
+    numbers = cardinal_up_to_million @ (
+        pynutil.delete(pynini.closure("0"))
+        + pynini.difference(DAMO_DIGIT, "0")
+        + pynini.closure(DAMO_DIGIT)
+    )
+
+    suffix = pynini.union(
+        "milhão",
+        "milhões",
+        "bilhão",
+        "bilhões",
+        "trilhão",
+        "trilhões",
+        "quatrilhão",
+        "quatrilhões",
+        "quintilhão",
+        "quintilhões",
+        "sextilhão",
+        "sextilhões",
+    )
+    res = (
+        pynutil.insert('integer_part: "')
+        + numbers
+        + pynutil.insert('"')
+        + delete_extra_space
+        + pynutil.insert('quantity: "')
+        + suffix
+        + pynutil.insert('"')
+    )
+    res |= (
+        decimal + delete_extra_space + pynutil.insert('quantity: "') + suffix + pynutil.insert('"')
+    )
+    return res
+
+
+class DecimalFst(GraphFst):
+    """
+    Finite state transducer for classifying decimal
+        Decimal point is either "." or ",", determined by whether "ponto" or "vírgula" is spoken.
+            e.g. menos um vírgula dois seis -> decimal { negative: "true" integer_part: "1" morphosyntactic_features: "," fractional_part: "26" }
+            e.g. menos um ponto dois seis -> decimal { negative: "true" integer_part: "1" morphosyntactic_features: "." fractional_part: "26" }
+
+        This decimal rule assumes that decimals can be pronounced as:
+        (a cardinal) + ('vírgula' or 'ponto') plus (any sequence of cardinals <1000, including 'zero')
+
+        Also writes large numbers in shortened form, e.g.
+            e.g. um vírgula dois seis milhões -> decimal { negative: "false" integer_part: "1" morphosyntactic_features: "," fractional_part: "26" quantity: "milhões" }
+            e.g. dois milhões -> decimal { negative: "false" integer_part: "2" quantity: "milhões" }
+            e.g. mil oitcentos e vinte e quatro milhões -> decimal { negative: "false" integer_part: "1824" quantity: "milhões" }
+    Args:
+        cardinal: CardinalFst
+
+    """
+
+    def __init__(self, cardinal: GraphFst):
+        super().__init__(name="decimal", kind="classify")
+
+        # number after decimal point can be any series of cardinals <1000, including 'zero'
+        graph_decimal = cardinal.numbers_up_to_thousand
+        graph_decimal = pynini.closure(graph_decimal + delete_space) + graph_decimal
+        self.graph = graph_decimal
+
+        # decimal point can be denoted by 'vírgula' or 'ponto'
+        decimal_point = pynini.cross("vírgula", 'morphosyntactic_features: ","')
+        decimal_point |= pynini.cross("ponto", 'morphosyntactic_features: "."')
+
+        optional_graph_negative = pynini.closure(
+            pynutil.insert("negative: ") + pynini.cross("menos", '"true"') + delete_extra_space,
+            0,
+            1,
+        )
+
+        graph_fractional = (
+            pynutil.insert('fractional_part: "') + graph_decimal + pynutil.insert('"')
+        )
+
+        cardinal_graph = cardinal.graph_no_exception | pynini.string_file(
+            get_abs_path("data/numbers/zero.tsv")
+        )
+        graph_integer = pynutil.insert('integer_part: "') + cardinal_graph + pynutil.insert('"')
+        final_graph_wo_sign = (
+            pynini.closure(graph_integer + delete_extra_space, 0, 1)
+            + decimal_point
+            + delete_extra_space
+            + graph_fractional
+        )
+        final_graph = optional_graph_negative + final_graph_wo_sign
+
+        self.final_graph_wo_negative = final_graph_wo_sign | get_quantity(
+            final_graph_wo_sign, cardinal.numbers_up_to_million
+        )
+        final_graph |= optional_graph_negative + get_quantity(
+            final_graph_wo_sign, cardinal.numbers_up_to_million
+        )
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/electronic.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/electronic.py
+import pynini
+from fun_text_processing.inverse_text_normalization.pt.utils import get_abs_path
+from fun_text_processing.text_normalization.en.graph_utils import DAMO_ALPHA, GraphFst, insert_space
+from pynini.lib import pynutil
+
+
+class ElectronicFst(GraphFst):
+    """
+    Finite state transducer for classifying 'electronic' semiotic classes, i.e.
+    email address (which get converted to "username" and "domain" fields),
+    and URLS (which get converted to a "protocol" field).
+        e.g. c d f um arroba a b c ponto e d u -> tokens { electronic { username: "cdf1" domain: "abc.edu" } }
+        e.g. dáblio dáblio dáblio a b c ponto e d u -> tokens { electronic { protocol: "www.abc.edu" } }
+    """
+
+    def __init__(self):
+        super().__init__(name="electronic", kind="classify")
+
+        delete_extra_space = pynutil.delete(" ")
+        alpha_num = (
+            DAMO_ALPHA
+            | pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+            | pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+        )
+
+        symbols = pynini.string_file(get_abs_path("data/electronic/symbols.tsv")).invert()
+
+        accepted_username = alpha_num | symbols
+        process_dot = pynini.cross("ponto", ".")
+        username = (
+            pynutil.insert('username: "')
+            + alpha_num
+            + delete_extra_space
+            + pynini.closure(accepted_username + delete_extra_space)
+            + alpha_num
+            + pynutil.insert('"')
+        )
+        single_alphanum = pynini.closure(alpha_num + delete_extra_space) + alpha_num
+        server = (
+            single_alphanum
+            | pynini.string_file(get_abs_path("data/electronic/server_name.tsv")).invert()
+        )
+        domain = (
+            single_alphanum
+            | pynini.string_file(get_abs_path("data/electronic/domain.tsv")).invert()
+        )
+        domain_graph = (
+            pynutil.insert('domain: "')
+            + server
+            + delete_extra_space
+            + process_dot
+            + delete_extra_space
+            + domain
+            + pynutil.insert('"')
+        )
+        graph = (
+            username
+            + delete_extra_space
+            + pynutil.delete("arroba")
+            + insert_space
+            + delete_extra_space
+            + domain_graph
+        )
+
+        ############# url ###
+        protocol_end = pynini.cross(pynini.union("www", "w w w", "dáblio dáblio dáblio"), "www")
+        protocol_start = pynini.cross(pynini.union("http", "h t t p", "agá tê tê pê"), "http")
+        protocol_start |= pynini.cross(
+            pynini.union("https", "h t t p s", "agá tê tê pê ésse"), "https"
+        )
+        protocol_start += pynini.cross(" dois pontos barra barra ", "://")
+
+        # e.g. .com, .es
+        ending = (
+            delete_extra_space
+            + symbols
+            + delete_extra_space
+            + (domain | pynini.closure(accepted_username + delete_extra_space) + accepted_username)
+        )
+
+        protocol = (
+            pynini.closure(protocol_start, 0, 1)
+            + protocol_end
+            + delete_extra_space
+            + process_dot
+            + delete_extra_space
+            + (pynini.closure(delete_extra_space + accepted_username, 1) | server)
+            + pynini.closure(ending, 1)
+        )
+        protocol = pynutil.insert('protocol: "') + protocol + pynutil.insert('"')
+        graph |= protocol
+        ########
+
+        final_graph = self.add_tokens(graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/measure.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/measure.py
+import pynini
+from fun_text_processing.inverse_text_normalization.pt.utils import get_abs_path
+from fun_text_processing.text_normalization.en.graph_utils import (
+    DAMO_SIGMA,
+    GraphFst,
+    convert_space,
+    delete_extra_space,
+    delete_space,
+)
+from pynini.lib import pynutil
+
+
+class MeasureFst(GraphFst):
+    """
+    Finite state transducer for classifying measure
+        e.g. menos doze quilogramas -> measure { cardinal { negative: "true" integer: "12" } units: "kg" }
+
+    Args:
+        cardinal: CardinalFst
+        decimal: DecimalFst
+    """
+
+    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
+        super().__init__(name="measure", kind="classify")
+
+        cardinal_graph = cardinal.graph_no_exception
+
+        graph_unit_singular = pynini.string_file(
+            get_abs_path("data/measurements_singular.tsv")
+        ).invert()
+        graph_unit_plural = pynini.string_file(
+            get_abs_path("data/measurements_plural.tsv")
+        ).invert()
+
+        optional_graph_negative = pynini.closure(
+            pynutil.insert("negative: ") + pynini.cross("menos", '"true"') + delete_extra_space,
+            0,
+            1,
+        )
+
+        unit_singular = convert_space(graph_unit_singular)
+        unit_plural = convert_space(graph_unit_plural)
+        unit_misc = (
+            pynutil.insert("/")
+            + pynutil.delete("por")
+            + delete_space
+            + convert_space(graph_unit_singular)
+        )
+
+        unit_singular = (
+            pynutil.insert('units: "')
+            + (
+                unit_singular
+                | unit_misc
+                | pynutil.add_weight(unit_singular + delete_space + unit_misc, 0.01)
+            )
+            + pynutil.insert('"')
+        )
+        unit_plural = (
+            pynutil.insert('units: "')
+            + (
+                unit_plural
+                | unit_misc
+                | pynutil.add_weight(unit_plural + delete_space + unit_misc, 0.01)
+            )
+            + pynutil.insert('"')
+        )
+
+        subgraph_decimal = (
+            pynutil.insert("decimal { ")
+            + optional_graph_negative
+            + decimal.final_graph_wo_negative
+            + pynutil.insert(" }")
+            + delete_extra_space
+            + unit_plural
+        )
+        subgraph_cardinal = (
+            pynutil.insert("cardinal { ")
+            + optional_graph_negative
+            + pynutil.insert('integer: "')
+            + ((DAMO_SIGMA - "um" - "uma") @ cardinal_graph)
+            + pynutil.insert('"')
+            + pynutil.insert(" }")
+            + delete_extra_space
+            + unit_plural
+        )
+        subgraph_cardinal |= (
+            pynutil.insert("cardinal { ")
+            + optional_graph_negative
+            + pynutil.insert('integer: "')
+            + (pynini.cross("um", "1") | pynini.cross("uma", "1"))
+            + pynutil.insert('"')
+            + pynutil.insert(" }")
+            + delete_extra_space
+            + unit_singular
+        )
+
+        final_graph = subgraph_decimal | subgraph_cardinal
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/money.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/money.py
+import pynini
+from fun_text_processing.inverse_text_normalization.pt.utils import get_abs_path
+from fun_text_processing.text_normalization.en.graph_utils import (
+    DAMO_DIGIT,
+    DAMO_SIGMA,
+    GraphFst,
+    convert_space,
+    delete_extra_space,
+    delete_space,
+    insert_space,
+)
+from pynini.lib import pynutil
+
+
+class MoneyFst(GraphFst):
+    """
+    Finite state transducer for classifying money
+        e.g. doze dólares e cinco centavos -> money { integer_part: "12" fractional_part: "05" currency: "$" }
+
+    Args:
+        cardinal: CardinalFst
+        decimal: DecimalFst
+    """
+
+    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
+        super().__init__(name="money", kind="classify")
+        # quantity, integer_part, fractional_part, currency
+
+        cardinal_graph = cardinal.graph_no_exception
+        graph_decimal_final = decimal.final_graph_wo_negative
+
+        unit_singular = pynini.string_file(get_abs_path("data/currency_singular.tsv")).invert()
+        unit_plural = pynini.string_file(get_abs_path("data/currency_plural.tsv")).invert()
+
+        graph_unit_singular = (
+            pynutil.insert('currency: "') + convert_space(unit_singular) + pynutil.insert('"')
+        )
+        graph_unit_plural = (
+            pynutil.insert('currency: "') + convert_space(unit_plural) + pynutil.insert('"')
+        )
+
+        add_leading_zero_to_double_digit = (DAMO_DIGIT + DAMO_DIGIT) | (
+            pynutil.insert("0") + DAMO_DIGIT
+        )
+        # twelve dollars (and) fifty cents, zero cents
+        cents_standalone = (
+            pynutil.insert('morphosyntactic_features: ","')  # always use a comma in the decimal
+            + insert_space
+            + pynutil.insert('fractional_part: "')
+            + pynini.union(
+                pynutil.add_weight(((DAMO_SIGMA - "um" - "uma") @ cardinal_graph), -0.7)
+                @ add_leading_zero_to_double_digit
+                + delete_space
+                + pynutil.delete(pynini.union("centavos")),
+                pynini.cross("um", "01") + delete_space + pynutil.delete(pynini.union("centavo")),
+            )
+            + pynutil.insert('"')
+        )
+
+        optional_cents_standalone = pynini.closure(
+            delete_space
+            + pynini.closure((pynutil.delete("com") | pynutil.delete("e")) + delete_space, 0, 1)
+            + insert_space
+            + cents_standalone,
+            0,
+            1,
+        )
+
+        # twelve dollars fifty, only after integer
+        # setenta e cinco dólares com sessenta e três ~ $75,63
+        optional_cents_suffix = pynini.closure(
+            delete_extra_space
+            + pynutil.insert('morphosyntactic_features: ","')  # always use a comma in the decimal
+            + insert_space
+            + pynutil.insert('fractional_part: "')
+            + pynini.closure((pynutil.delete("com") | pynutil.delete("e")) + delete_space, 0, 1)
+            + pynutil.add_weight(cardinal_graph @ add_leading_zero_to_double_digit, -0.7)
+            + pynutil.insert('"'),
+            0,
+            1,
+        )
+
+        graph_integer = (
+            pynutil.insert('integer_part: "')
+            + ((DAMO_SIGMA - "um" - "uma") @ cardinal_graph)
+            + pynutil.insert('"')
+            + delete_extra_space
+            + graph_unit_plural
+            + (optional_cents_standalone | optional_cents_suffix)
+        )
+        graph_integer |= (
+            pynutil.insert('integer_part: "')
+            + (pynini.cross("um", "1") | pynini.cross("uma", "1"))
+            + pynutil.insert('"')
+            + delete_extra_space
+            + graph_unit_singular
+            + (optional_cents_standalone | optional_cents_suffix)
+        )
+
+        graph_cents_standalone = pynini.union(
+            pynutil.insert('currency: "R$" integer_part: "0" ') + cents_standalone,
+            pynutil.add_weight(
+                pynutil.insert('integer_part: "0" ')
+                + cents_standalone
+                + delete_extra_space
+                + pynutil.delete("de")
+                + delete_space
+                + graph_unit_singular,
+                -0.1,
+            ),
+        )
+
+        graph_decimal = (
+            graph_decimal_final
+            + delete_extra_space
+            + (pynutil.delete("de") + delete_space).ques
+            + graph_unit_plural
+        )
+        graph_decimal |= graph_cents_standalone
+        final_graph = graph_integer | graph_decimal
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/ordinal.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/ordinal.py
+import pynini
+from fun_text_processing.inverse_text_normalization.pt.utils import get_abs_path
+from fun_text_processing.text_normalization.en.graph_utils import DAMO_SIGMA, GraphFst, delete_space
+from pynini.lib import pynutil
+
+
+class OrdinalFst(GraphFst):
+    """
+    Finite state transducer for classifying ordinal
+        vigésimo primeiro -> ordinal { integer: "21" morphosyntactic_features: "o" }
+    This class converts ordinal up to "milésimo" (one thousandth) exclusive.
+
+    Cardinals below ten are not converted (in order to avoid
+    e.g. "primero fez ..." -> "1º fez...", "segunda guerra mundial" -> "2ª guerra mundial"
+    and any other odd conversions.)
+
+    This FST also records the ending of the ordinal (called "morphosyntactic_features"):
+    either "o" or "a".
+
+    Args:
+        cardinal: CardinalFst
+    """
+
+    def __init__(self):
+        super().__init__(name="ordinal", kind="classify")
+
+        graph_digit = pynini.string_file(get_abs_path("data/ordinals/digit.tsv"))
+        graph_ties = pynini.string_file(get_abs_path("data/ordinals/ties.tsv"))
+        graph_hundreds = pynini.string_file(get_abs_path("data/ordinals/hundreds.tsv"))
+
+        ordinal_graph_union = pynini.union(
+            pynutil.add_weight(graph_digit, 0.4),
+            pynutil.add_weight(
+                graph_ties + ((delete_space + graph_digit) | pynutil.insert("0")), 0.2
+            ),
+            graph_hundreds
+            + ((delete_space + graph_ties) | pynutil.insert("0"))
+            + ((delete_space + graph_digit) | pynutil.insert("0")),
+        )
+
+        accept_o_endings = DAMO_SIGMA + pynini.accep("o")
+        accept_a_endings = DAMO_SIGMA + pynini.accep("a")
+
+        ordinal_graph_o = accept_o_endings @ ordinal_graph_union
+        ordinal_graph_a = accept_a_endings @ ordinal_graph_union
+
+        # 'optional_numbers_in_front' have negative weight so we always
+        # include them if they're there
+        optional_in_front = (
+            pynutil.add_weight(ordinal_graph_union, -0.1) + delete_space.closure()
+        ).closure()
+        graph_o_suffix = optional_in_front + ordinal_graph_o
+        graph_a_suffix = optional_in_front + ordinal_graph_a
+
+        # don't convert ordinals from one to nine inclusive
+        graph_exception = pynini.project(pynini.union(graph_digit), "input")
+        graph_o_suffix = (
+            pynini.project(graph_o_suffix, "input") - graph_exception.arcsort()
+        ) @ graph_o_suffix
+        graph_a_suffix = (
+            pynini.project(graph_a_suffix, "input") - graph_exception.arcsort()
+        ) @ graph_a_suffix
+
+        graph = (
+            pynutil.insert('integer: "')
+            + graph_o_suffix
+            + pynutil.insert('"')
+            + pynutil.insert(' morphosyntactic_features: "o"')
+        )
+        graph |= (
+            pynutil.insert('integer: "')
+            + graph_a_suffix
+            + pynutil.insert('"')
+            + pynutil.insert(' morphosyntactic_features: "a"')
+        )
+
+        final_graph = self.add_tokens(graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/punctuation.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/punctuation.py
+import pynini
+from fun_text_processing.text_normalization.en.graph_utils import GraphFst
+from pynini.lib import pynutil
+
+
+class PunctuationFst(GraphFst):
+    """
+    Finite state transducer for classifying punctuation
+        e.g. a, -> tokens { name: "a" } tokens { name: "," }
+    """
+
+    def __init__(self):
+        super().__init__(name="punctuation", kind="classify")
+
+        s = "!#$%&'()*+,-./:;<=>?@^_`{|}~"
+        punct = pynini.union(*s)
+
+        graph = pynutil.insert('name: "') + punct + pynutil.insert('"')
+
+        self.fst = graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/telephone.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/pt/taggers/telephone.py
+import pynini
+from fun_text_processing.inverse_text_normalization.pt.utils import get_abs_path
+from fun_text_processing.text_normalization.en.graph_utils import (
+    GraphFst,
+    delete_space,
+    insert_space,
+)
+from pynini.lib import pynutil
+
+
+class TelephoneFst(GraphFst):
+    """
+    Finite state transducer for classifying telephone numbers, e.g.
+        um dois um dois três quatro cinco seis sete oito nove -> { number_part: "(12) 12345-6789" }.
+        If 11 digits are spoken, they are grouped as 2+5+4 (eg. (12) 34567-8901).
+        If 10 digits are spoken, they are grouped as 2+4+4 (eg. (12) 3456-7890).
+        If 9 digits are spoken, they are grouped as 5+4 (eg. 12345-6789).
+        If 8 digits are spoken, they are grouped as 4+4 (eg. 1234-5678).
+        In portuguese, digits are generally spoken individually, or as 2-digit numbers,
+        eg. "trinta e quatro oitenta e dois" = "3482",
+            "meia sete vinte" = "6720".
+    """
+
+    def __init__(self):
+        super().__init__(name="telephone", kind="classify")
+
+        # create `single_digits` and `double_digits` graphs as these will be
+        # the building blocks of possible telephone numbers
+        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+        graph_ties = pynini.string_file(get_abs_path("data/numbers/ties.tsv"))
+        graph_twenties = pynini.string_file(get_abs_path("data/numbers/twenties.tsv"))
+        graph_teen = pynini.string_file(get_abs_path("data/numbers/teen.tsv"))
+        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+        graph_half = pynini.cross("meia", "6")
+
+        graph_all_digits = pynini.union(graph_digit, graph_half, graph_zero)
+
+        single_digits = pynini.invert(graph_all_digits).optimize()
+
+        double_digits = (
+            pynini.union(
+                graph_teen | graph_twenties,
+                (graph_ties + pynutil.insert("0")),
+                (graph_ties + delete_space + pynutil.delete("e") + delete_space + graph_digit),
+                (graph_all_digits + delete_space + graph_all_digits),
+            )
+            .invert()
+            .optimize()
+        )
+
+        # define `eleven_digit_graph`, `ten_digit_graph`, `nine_digit_graph`, `eight_digit_graph`
+        # which accept telephone numbers spoken (1) only with single digits,
+        # or (2) spoken with double digits (and sometimes single digits)
+
+        # 11-digit option (2): (2) + (1+2+2) + (2+2) digits
+        eleven_digit_graph = (
+            pynutil.delete("(")
+            + double_digits
+            + insert_space
+            + pynutil.delete(") ")
+            + single_digits
+            + insert_space
+            + double_digits
+            + insert_space
+            + double_digits
+            + insert_space
+            + pynutil.delete("-")
+            + double_digits
+            + insert_space
+            + double_digits
+        )
+
+        # 10-digit option (2): (2) + (2+2) + (2+2) digits
+        ten_digit_graph = (
+            pynutil.delete("(")
+            + double_digits
+            + insert_space
+            + pynutil.delete(") ")
+            + double_digits
+            + insert_space
+            + double_digits
+            + insert_space
+            + pynutil.delete("-")
+            + double_digits
+            + insert_space
+            + double_digits
+        )
+
+        # 9-digit option (2): (1+2+2) + (2+2) digits
+        nine_digit_graph = (
+            single_digits
+            + insert_space
+            + double_digits
+            + insert_space
+            + double_digits
+            + insert_space
+            + pynutil.delete("-")
+            + double_digits
+            + insert_space
+            + double_digits
+        )
+
+        # 8-digit option (2): (2+2) + (2+2) digits
+        eight_digit_graph = (
+            double_digits
+            + insert_space
+            + double_digits
+            + insert_space
+            + pynutil.delete("-")
+            + double_digits
+            + insert_space
+            + double_digits
+        )
+
+        number_part = pynini.union(
+            eleven_digit_graph, ten_digit_graph, nine_digit_graph, eight_digit_graph
+        )
+
+        number_part = (
+            pynutil.insert('number_part: "') + pynini.invert(number_part) + pynutil.insert('"')
+        )
+
+        graph = number_part
+        final_graph = self.add_tokens(graph)
+        self.fst = final_graph.optimize()