initial commit

70a8a9e0 · wangwei990215 · 70a8a9e0 · 70a8a9e0 · 70a8a9e0 · 70a8a9e0
Commit 70a8a9e0 authored Oct 03, 2024 by wangwei990215
20 changed files
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/minute_to.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/minute_to.tsv
+1	59
+2	58
+3	57
+4	56
+5	55
+6	54
+7	53
+8	52
+9	51
+10	50
+11	49
+12	48
+13	47
+14	46
+15	45
+16	44
+17	43
+18	42
+19	41
+20	40
+21	39
+22	38
+23	37
+24	36
+25	35
+26	34
+27	33
+28	32
+29	31
+30	30
+31	29
+32	28
+33	27
+34	26
+35	25
+36	24
+37	23
+38	22
+39	21
+40	20
+41	19
+42	18
+43	17
+44	16
+45	15
+46	14
+47	13
+48	12
+49	11
+50	10
+51	9
+52	8
+53	7
+54	6
+55	5
+56	4
+57	3
+58	2
+59	1
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/minutes.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/minutes.tsv
+일분	01
+이분	02
+삼분	03
+사분	04
+오분	05
+육분	06
+칠분	07
+팔분	08
+구분	09
+십분	10
+십일분	11
+십이분	12
+십삼분	13
+십사분	14
+십오분	15
+십육분	16
+십칠분	17
+십팔분	18
+십구분	19
+이십분	20
+이십일분	21
+이십이분	22
+이십삼분	23
+이십사분	24
+이십오분	25
+이십육분	26
+이십칠분	27
+이십팔분	28
+이십구분	29
+삼십분	30
+삼십일분	31
+삼십이분	32
+삼십삼분	33
+삼십사분	34
+삼십오분	35
+삼십육분	36
+삼십칠분	37
+삼십팔분	38
+삼십구분	39
+사십분	40
+사십일분	41
+사십이분	42
+사십삼분	43
+사십사분	44
+사십오분	45
+사십육분	46
+사십칠분	47
+사십팔분	48
+사십구분	49
+오십분	50
+오십일분	51
+오십이분	52
+오십삼분	53
+오십사분	54
+오십오분	55
+오십육분	56
+오십칠분	57
+오십팔분	58
+오십구분	59
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/seconds.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/seconds.tsv
+일초	01
+이초	02
+삼초	03
+사초	04
+오초	05
+육초	06
+칠초	07
+팔초	08
+구초	09
+십초	10
+십일초	11
+십이초	12
+십삼초	13
+십사초	14
+십오초	15
+십육초	16
+십칠초	17
+십팔초	18
+십구초	19
+이십초	20
+이십일초	21
+이십이초	22
+이십삼초	23
+이십사초	24
+이십오초	25
+이십육초	26
+이십칠초	27
+이십팔초	28
+이십구초	29
+삼십초	30
+삼십일초	31
+삼십이초	32
+삼십삼초	33
+삼십사초	34
+삼십오초	35
+삼십육초	36
+삼십칠초	37
+삼십팔초	38
+삼십구초	39
+사십초	40
+사십일초	41
+사십이초	42
+사십삼초	43
+사십사초	44
+사십오초	45
+사십육초	46
+사십칠초	47
+사십팔초	48
+사십구초	49
+오십초	50
+오십일초	51
+오십이초	52
+오십삼초	53
+오십사초	54
+오십오초	55
+오십육초	56
+오십칠초	57
+오십팔초	58
+오십구초	59
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/time_suffix.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/time_suffix.tsv
+p m	p.m.
+pm	p.m.
+p.m.
+p.m	p.m.
+am	a.m.
+a.m.
+a.m	a.m.
+a m	a.m.
\ No newline at end of file
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/time_zone.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/time_zone.tsv
+cst	c s t
+cet	c e t
+pst	p s t
+est	e s t
+pt	p t
+et	e t
+gmt	g m t
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/to_hour.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/data/time/to_hour.tsv
+one	12
+two	1
+three	2
+four	3
+five	4
+six	5
+seven	6
+eigh	7
+nine	8
+ten	9
+eleven	10
+twelve	11
\ No newline at end of file
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/data/whitelist.tsv
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/data/whitelist.tsv
+e.g.	for example
+dr.	doctor
+mr.	mister
+mrs.	misses
+st.	saint
+7-eleven	seven eleven
+es3	e s three
+s&p	s and p
+ASAP	a s a p
+AT&T	a t and t
+LLP	l l p
+ATM	a t m
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/graph_utils.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/graph_utils.py
+import os
+import string
+from pathlib import Path
+from typing import Dict
+
+import pynini
+from fun_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+from pynini import Far
+from pynini.examples import plurals
+from pynini.export import export
+from pynini.lib import byte, pynutil, utf8
+
+DAMO_CHAR = utf8.VALID_UTF8_CHAR
+
+DAMO_DIGIT = byte.DIGIT
+DAMO_LOWER = pynini.union(*string.ascii_lowercase).optimize()
+DAMO_UPPER = pynini.union(*string.ascii_uppercase).optimize()
+DAMO_ALPHA = pynini.union(DAMO_LOWER, DAMO_UPPER).optimize()
+DAMO_ALNUM = pynini.union(DAMO_DIGIT, DAMO_ALPHA).optimize()
+DAMO_HEX = pynini.union(*string.hexdigits).optimize()
+DAMO_NON_BREAKING_SPACE = "\u00A0"
+DAMO_SPACE = " "
+DAMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r", "\u00A0").optimize()
+DAMO_NOT_SPACE = pynini.difference(DAMO_CHAR, DAMO_WHITE_SPACE).optimize()
+DAMO_NOT_QUOTE = pynini.difference(DAMO_CHAR, r'"').optimize()
+
+DAMO_PUNCT = pynini.union(*map(pynini.escape, string.punctuation)).optimize()
+DAMO_GRAPH = pynini.union(DAMO_ALNUM, DAMO_PUNCT).optimize()
+
+DAMO_SIGMA = pynini.closure(DAMO_CHAR)
+
+delete_space = pynutil.delete(pynini.closure(DAMO_WHITE_SPACE))
+delete_zero_or_one_space = pynutil.delete(pynini.closure(DAMO_WHITE_SPACE, 0, 1))
+insert_space = pynutil.insert(" ")
+delete_extra_space = pynini.cross(pynini.closure(DAMO_WHITE_SPACE, 1), " ")
+delete_preserve_order = pynini.closure(
+    pynutil.delete(" preserve_order: true")
+    | (pynutil.delete(' field_order: "') + DAMO_NOT_QUOTE + pynutil.delete('"'))
+)
+
+suppletive = pynini.string_file(get_abs_path("data/suppletive.tsv"))
+# _v = pynini.union("a", "e", "i", "o", "u")
+_c = pynini.union(
+    "b",
+    "c",
+    "d",
+    "f",
+    "g",
+    "h",
+    "j",
+    "k",
+    "l",
+    "m",
+    "n",
+    "p",
+    "q",
+    "r",
+    "s",
+    "t",
+    "v",
+    "w",
+    "x",
+    "y",
+    "z",
+)
+_ies = DAMO_SIGMA + _c + pynini.cross("y", "ies")
+_es = DAMO_SIGMA + pynini.union("s", "sh", "ch", "x", "z") + pynutil.insert("es")
+_s = DAMO_SIGMA + pynutil.insert("s")
+
+graph_plural = plurals._priority_union(
+    suppletive,
+    plurals._priority_union(_ies, plurals._priority_union(_es, _s, DAMO_SIGMA), DAMO_SIGMA),
+    DAMO_SIGMA,
+).optimize()
+
+SINGULAR_TO_PLURAL = graph_plural
+PLURAL_TO_SINGULAR = pynini.invert(graph_plural)
+TO_LOWER = pynini.union(
+    *[pynini.cross(x, y) for x, y in zip(string.ascii_uppercase, string.ascii_lowercase)]
+)
+TO_UPPER = pynini.invert(TO_LOWER)
+MIN_NEG_WEIGHT = -0.0001
+MIN_POS_WEIGHT = 0.0001
+
+
+def generator_main(file_name: str, graphs: Dict[str, "pynini.FstLike"]):
+    """
+    Exports graph as OpenFst finite state archive (FAR) file with given file name and rule name.
+
+    Args:
+        file_name: exported file name
+        graphs: Mapping of a rule name and Pynini WFST graph to be exported
+    """
+    exporter = export.Exporter(file_name)
+    for rule, graph in graphs.items():
+        exporter[rule] = graph.optimize()
+    exporter.close()
+    print(f"Created {file_name}")
+
+
+def get_plurals(fst):
+    """
+    Given singular returns plurals
+
+    Args:
+        fst: Fst
+
+    Returns plurals to given singular forms
+    """
+    return SINGULAR_TO_PLURAL @ fst
+
+
+def get_singulars(fst):
+    """
+    Given plural returns singulars
+
+    Args:
+        fst: Fst
+
+    Returns singulars to given plural forms
+    """
+    return PLURAL_TO_SINGULAR @ fst
+
+
+def convert_space(fst) -> "pynini.FstLike":
+    """
+    Converts space to nonbreaking space.
+    Used only in tagger grammars for transducing token values within quotes, e.g. name: "hello kitty"
+    This is making transducer significantly slower, so only use when there could be potential spaces within quotes, otherwise leave it.
+
+    Args:
+        fst: input fst
+
+    Returns output fst where breaking spaces are converted to non breaking spaces
+    """
+    return fst @ pynini.cdrewrite(
+        pynini.cross(DAMO_SPACE, DAMO_NON_BREAKING_SPACE), "", "", DAMO_SIGMA
+    )
+
+
+class GraphFst:
+    """
+    Base class for all grammar fsts.
+
+    Args:
+        name: name of grammar class
+        kind: either 'classify' or 'verbalize'
+        deterministic: if True will provide a single transduction option,
+            for False multiple transduction are generated (used for audio-based normalization)
+    """
+
+    def __init__(self, name: str, kind: str, deterministic: bool = True):
+        self.name = name
+        self.kind = kind
+        self._fst = None
+        self.deterministic = deterministic
+
+        self.far_path = Path(os.path.dirname(__file__) + "/grammars/" + kind + "/" + name + ".far")
+        if self.far_exist():
+            self._fst = Far(
+                self.far_path, mode="r", arc_type="standard", far_type="default"
+            ).get_fst()
+
+    def far_exist(self) -> bool:
+        """
+        Returns true if FAR can be loaded
+        """
+        return self.far_path.exists()
+
+    @property
+    def fst(self) -> "pynini.FstLike":
+        return self._fst
+
+    @fst.setter
+    def fst(self, fst):
+        self._fst = fst
+
+    def add_tokens(self, fst) -> "pynini.FstLike":
+        """
+        Wraps class name around to given fst
+
+        Args:
+            fst: input fst
+
+        Returns:
+            Fst: fst
+        """
+        return pynutil.insert(f"{self.name} {{ ") + fst + pynutil.insert(" }")
+
+    def delete_tokens(self, fst) -> "pynini.FstLike":
+        """
+        Deletes class name wrap around output of given fst
+
+        Args:
+            fst: input fst
+
+        Returns:
+            Fst: fst
+        """
+        res = (
+            pynutil.delete(f"{self.name}")
+            + delete_space
+            + pynutil.delete("{")
+            + delete_space
+            + fst
+            + delete_space
+            + pynutil.delete("}")
+        )
+        return res @ pynini.cdrewrite(pynini.cross("\u00A0", " "), "", "", DAMO_SIGMA)
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/.cardinal.py.swo
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/.cardinal.py.swo
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/__init__.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/__init__.py
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/cardinal.py
+import pynini
+from fun_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+from fun_text_processing.inverse_text_normalization.ko.graph_utils import (
+    DAMO_ALPHA,
+    DAMO_SIGMA,
+    DAMO_DIGIT,
+    DAMO_SPACE,
+    DAMO_CHAR,
+    GraphFst,
+    delete_space,
+)
+from pynini.lib import pynutil
+
+
+class CardinalFst(GraphFst):
+    """
+    Finite state transducer for classifying cardinals
+        e.g. minus twenty three -> cardinal { integer: "23" negative: "-" } }
+    Numbers below thirteen are not converted.
+    """
+
+    def __init__(self):
+        super().__init__(name="cardinal", kind="classify")
+        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+
+        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+        graph_teens_without_zero = pynini.string_file(
+            get_abs_path("data/numbers/digit_teens_without_zero.tsv")
+        )
+        graph_teens = pynini.string_file(get_abs_path("data/numbers/digit_teens.tsv"))
+
+        graph_inh_digit = pynini.string_file(get_abs_path("data/numbers/digit_inherent_digit.tsv"))
+        graph_inh_teen_without_zero = pynini.string_file(
+            get_abs_path("data/numbers/digit_inherent_teens_without_zero.tsv")
+        )
+        graph_inh_teen = pynini.string_file(get_abs_path("data/numbers/digit_inherent_teens.tsv"))
+        graph_inh_teen_others = pynini.string_file(
+            get_abs_path("data/numbers/digit_inherent_others.tsv")
+        )
+
+        graph_less_hundred_num_inh_p1 = graph_inh_teen_without_zero + graph_inh_digit
+        graph_less_hundred_num_inh = pynini.union(
+            graph_inh_teen, graph_less_hundred_num_inh_p1, graph_inh_teen_others
+        )
+
+        graph_less_hundred_num_p1 = graph_teens_without_zero + graph_digit
+        graph_less_hundred_num = pynini.union(graph_less_hundred_num_p1, graph_teens)
+
+        # digits
+        addzero = pynutil.insert("0")
+        zero = graph_zero
+        digits_combine = graph_digit | graph_inh_digit | zero
+        digits = graph_digit | zero
+        digit = graph_digit
+
+        # teens
+        teens_combine = graph_less_hundred_num | graph_less_hundred_num_inh
+        # teens = graph_less_hundred_num
+        teens = teens_combine
+
+        # hundred, #백 单位 百
+        hundred = (
+            digit
+            + pynutil.delete("백")
+            + (
+                teens
+                | pynutil.add_weight(zero + digit, 0.1)
+                | pynutil.add_weight(digit + addzero, 0.5)
+                | pynutil.add_weight(addzero**2, 1.0)
+            )
+        )
+
+        graph_hundred_component_at_least_one_none_zero_digit = hundred @ (
+            pynini.closure(DAMO_DIGIT) + (DAMO_DIGIT - "0") + pynini.closure(DAMO_DIGIT)
+        )
+
+        self.graph_hundred_component_at_least_one_none_zero_digit = (
+            graph_hundred_component_at_least_one_none_zero_digit
+        )
+
+        ##thousand 천 千单位
+        thousand = (
+            (hundred | teens | digits)
+            + pynutil.delete("천")
+            + (
+                hundred
+                | pynutil.add_weight(zero + teens, 0.1)
+                | pynutil.add_weight(addzero + zero + digit, 0.5)
+                | pynutil.add_weight(digit + addzero**2, 0.8)
+                | pynutil.add_weight(addzero**3, 1.0)
+            )
+        )
+
+        ##만 单位万
+        ten_thousand = (
+            (thousand | hundred | teens | digits)
+            + pynutil.delete("만")
+            + pynini.cross(" ", "").ques
+            + (
+                thousand
+                | pynutil.add_weight(zero + hundred, 0.1)
+                | pynutil.add_weight(addzero + zero + teens, 0.5)
+                | pynutil.add_weight(addzero + addzero + zero + digit, 0.5)
+                | pynutil.add_weight(digit + addzero**3, 0.8)
+                | pynutil.add_weight(addzero**4, 1.0)
+            )
+        )
+
+        ##조, 单位兆，  억, 单位亿
+        number = digits | teens | hundred | thousand | ten_thousand
+
+        ## ques is equal to pynini.closure(, 0, 1)
+        number = (
+            (number + pynini.accep("조").ques + pynini.cross(" ", "").ques).ques
+            + (number + pynini.accep("억").ques + pynini.cross(" ", "").ques).ques
+            + number
+        )
+
+        graph = (
+            number
+            | graph_less_hundred_num_inh
+            | graph_inh_digit
+            | graph_inh_teen
+            | graph_inh_teen_others
+        )
+        # labels_exception = [num_to_word(x) for x in range(0, 13)]
+        labels_exception = ["zzzzzzzzz"]
+        graph_exception = pynini.union(*labels_exception)
+
+        self.graph_no_exception = graph
+
+        self.graph = (pynini.project(graph, "input") - graph_exception.arcsort()) @ graph
+
+        optional_minus_graph = pynini.closure(
+            pynutil.insert("negative: ") + pynini.cross("마이너스", '"-"') + DAMO_SPACE, 0, 1
+        )
+
+        final_graph = (
+            optional_minus_graph + pynutil.insert('integer: "') + self.graph + pynutil.insert('"')
+        )
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/date.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/date.py
+import pynini
+from fun_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+from fun_text_processing.inverse_text_normalization.ko.graph_utils import (
+    DAMO_ALPHA,
+    DAMO_DIGIT,
+    GraphFst,
+    delete_extra_space,
+    delete_space,
+)
+from pynini.lib import pynutil
+
+graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv")).optimize()
+graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).optimize()
+graph_digit_inh = pynini.string_file(
+    get_abs_path("data/numbers/digit_inherent_digit.tsv")
+).optimize()
+
+
+def _get_month_graph():
+    """
+    Transducer for month, e.g. march -> march
+    """
+    month_graph = pynini.string_file(get_abs_path("data/months.tsv"))
+    # print(month_graph)
+    return month_graph
+
+
+def _get_day_graph():
+    """
+    Transducer for month, e.g. march -> march
+    """
+    day_graph_num = pynini.string_file(get_abs_path("data/day.tsv"))
+    day_graph_inh = pynini.string_file(get_abs_path("data/day_inherent.tsv"))
+    day_graph = pynini.union(day_graph_num, day_graph_inh)
+    # print(day_graph)
+    return day_graph
+
+
+def _get_year_graph():
+    """
+    Transducer for year, e.g. twenty twenty -> 2020
+    """
+    digit = graph_digit | graph_digit_inh
+    zero = graph_zero
+    year_graph_4num = digit + (digit | zero) ** 3
+    year_graph_2num = digit**2
+
+    year_graph = pynini.union(year_graph_4num, year_graph_2num)
+    return year_graph
+
+
+class DateFst(GraphFst):
+    """
+    Finite state transducer for classifying date,
+        e.g. january fifth twenty twelve -> date { month: "january" day: "5" year: "2012" preserve_order: true }
+        e.g. the fifth of january twenty twelve -> date { day: "5" month: "january" year: "2012" preserve_order: true }
+        e.g. twenty twenty -> date { year: "2012" preserve_order: true }
+    Args:
+        ordinal: OrdinalFst
+    """
+
+    def __init__(self):
+        super().__init__(name="date", kind="classify")
+
+        year_graph = _get_year_graph() + pynini.accep("년")
+        YEAR_WEIGHT = 0.001
+        year_graph = (
+            pynutil.insert('year: "')
+            + pynutil.add_weight(year_graph, YEAR_WEIGHT)
+            + pynutil.insert('"')
+        )
+        # year_graph_space = pynutil.insert("year: \"") + pynutil.add_weight(year_graph, YEAR_WEIGHT) + pynutil.insert("\"") + pynutil.insert(" ")
+        # year_graph = pynutil.insert("year: \"") + year_graph + pynutil.insert("\"")
+
+        MONTH_WEIGHT = -0.001
+        month_graph = _get_month_graph() + pynini.cross("", "월")
+        # month_graph = pynutil.insert("month: \"") + pynutil.add_weight(month_graph, MONTH_WEIGHT) + pynutil.insert("\"")
+        month_graph = pynutil.insert('month: "') + month_graph + pynutil.insert('"')
+        # month_graph_space = pynutil.insert("month: \"") + month_graph + pynutil.insert("\"") + pynutil.insert(" ")
+
+        day_graph = _get_day_graph() + pynini.cross("", "일")
+        DAY_WEIGHT = -0.7
+        # day_graph = pynutil.insert("day: \"") + pynutil.add_weight(day_graph, DAY_WEIGHT) + pynutil.insert("\"")
+        day_graph = pynutil.insert('day: "') + day_graph + pynutil.insert('"')
+        # day_graph_space = pynutil.insert("day: \"") + day_graph + pynutil.insert("\"") + pynutil.insert(" ")
+
+        graph_ymd = year_graph + delete_space + month_graph + delete_space + day_graph
+        graph_md = month_graph + delete_space + day_graph
+        graph_ym = year_graph + delete_space + month_graph
+
+        final_graph = graph_ymd | graph_md | graph_ym | year_graph | month_graph | day_graph
+
+        final_graph += pynutil.insert(" preserve_order: true")
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/decimal.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/decimal.py
+import pynini
+from fun_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+from fun_text_processing.inverse_text_normalization.ko.graph_utils import (
+    DAMO_DIGIT,
+    GraphFst,
+    delete_extra_space,
+    delete_space,
+)
+from pynini.lib import pynutil
+
+
+def get_quantity(
+    decimal: "pynini.FstLike", cardinal_up_to_hundred: "pynini.FstLike"
+) -> "pynini.FstLike":
+    """
+    Returns FST that transforms either a cardinal or decimal followed by a quantity into a numeral,
+    e.g. one million -> integer_part: "1" quantity: "million"
+    e.g. one point five million -> integer_part: "1" fractional_part: "5" quantity: "million"
+
+    Args:
+        decimal: decimal FST
+        cardinal_up_to_hundred: cardinal FST
+    """
+    numbers = cardinal_up_to_hundred @ (
+        pynutil.delete(pynini.closure("0"))
+        + pynini.difference(DAMO_DIGIT, "0")
+        + pynini.closure(DAMO_DIGIT)
+    )
+    # "만", "백만", "천만", "억", "조", 万、百万、千万、亿、兆
+    # 천 千
+    suffix = pynini.union("만", "백만", "천만", "억", "조")
+    res = (
+        pynutil.insert('integer_part: "')
+        + numbers
+        + pynutil.insert('"')
+        + delete_extra_space
+        + pynutil.insert('quantity: "')
+        + suffix
+        + pynutil.insert('"')
+    )
+    res |= (
+        decimal
+        + delete_extra_space
+        + pynutil.insert('quantity: "')
+        + (suffix | "천")
+        + pynutil.insert('"')
+    )
+    return res
+
+
+class DecimalFst(GraphFst):
+    """
+    Finite state transducer for classifying decimal
+        e.g. minus twelve point five o o six billion -> decimal { negative: "true" integer_part: "12"  fractional_part: "5006" quantity: "billion" }
+        e.g. one billion -> decimal { integer_part: "1" quantity: "billion" }
+    Args:
+        cardinal: CardinalFst
+    """
+
+    def __init__(self, cardinal: GraphFst):
+        super().__init__(name="decimal", kind="classify")
+
+        cardinal_graph = cardinal.graph_no_exception
+
+        graph_decimal = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+        graph_decimal |= pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+
+        graph_decimal = pynini.closure(graph_decimal)
+        self.graph = graph_decimal
+
+        ##마이너스 负
+        optional_graph_negative = pynini.closure(
+            pynutil.insert("negative: ") + pynini.cross("마이너스", '"true"') + delete_extra_space,
+            0,
+            1,
+        )
+
+        graph_fractional = (
+            pynutil.insert('fractional_part: "') + graph_decimal + pynutil.insert('"')
+        )
+
+        # 점 点
+        graph_integer = (
+            pynutil.insert('integer_part: "')
+            + cardinal_graph
+            + pynutil.delete("점")
+            + pynutil.insert('"')
+        )
+
+        final_graph_wo_sign = graph_integer + pynini.cross(" ", " ") + graph_fractional
+
+        final_graph = optional_graph_negative + delete_space + final_graph_wo_sign
+
+        self.final_graph_wo_negative = final_graph_wo_sign | get_quantity(
+            final_graph_wo_sign, cardinal.graph_hundred_component_at_least_one_none_zero_digit
+        )
+        final_graph |= optional_graph_negative + get_quantity(
+            final_graph_wo_sign, cardinal.graph_hundred_component_at_least_one_none_zero_digit
+        )
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/electronic.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/electronic.py
+import pynini
+from fun_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+from fun_text_processing.inverse_text_normalization.ko.graph_utils import (
+    DAMO_ALPHA,
+    GraphFst,
+    insert_space,
+)
+from pynini.lib import pynutil
+
+
+class ElectronicFst(GraphFst):
+    """
+    Finite state transducer for classifying electronic: as URLs, email addresses, etc.
+        e.g. c d f one at a b c dot e d u -> tokens { electronic { username: "cdf1" domain: "abc.edu" } }
+    """
+
+    def __init__(self):
+        super().__init__(name="electronic", kind="classify")
+
+        delete_extra_space = pynutil.delete(" ")
+        alpha_num = (
+            DAMO_ALPHA
+            | pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+            | pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+        )
+
+        symbols = pynini.string_file(get_abs_path("data/electronic/symbols.tsv")).invert()
+
+        accepted_username = alpha_num | symbols
+        process_dot = pynini.cross("점", ".")
+        username = (
+            alpha_num + pynini.closure(delete_extra_space + accepted_username)
+        ) | pynutil.add_weight(pynini.closure(DAMO_ALPHA, 1), weight=0.0001)
+        username = pynutil.insert('username: "') + username + pynutil.insert('"')
+        single_alphanum = pynini.closure(alpha_num + delete_extra_space) + alpha_num
+        server = single_alphanum | pynini.string_file(
+            get_abs_path("data/electronic/server_name.tsv")
+        )
+        domain = single_alphanum | pynini.string_file(get_abs_path("data/electronic/domain.tsv"))
+        domain_graph = (
+            pynutil.insert('domain: "')
+            + server
+            + delete_extra_space
+            + process_dot
+            + delete_extra_space
+            + domain
+            + pynutil.insert('"')
+        )
+        graph = (
+            username
+            + delete_extra_space
+            + pynutil.delete("에서")
+            + insert_space
+            + delete_extra_space
+            + domain_graph
+        )
+
+        ############# url ###
+        protocol_end = pynini.cross(pynini.union("w w w", "www"), "www")
+        protocol_start = (
+            pynini.cross("h t t p", "http") | pynini.cross("h t t p s", "https")
+        ) + pynini.cross(" 콜론 슬래시 슬래시 ", "://")
+        # .com,
+        ending = (
+            delete_extra_space
+            + symbols
+            + delete_extra_space
+            + (
+                domain
+                | pynini.closure(
+                    accepted_username + delete_extra_space,
+                )
+                + accepted_username
+            )
+        )
+
+        protocol_default = (
+            (
+                (pynini.closure(delete_extra_space + accepted_username, 1) | server)
+                | pynutil.add_weight(pynini.closure(DAMO_ALPHA, 1), weight=0.0001)
+            )
+            + pynini.closure(ending, 1)
+        ).optimize()
+        protocol = (
+            pynini.closure(protocol_start, 0, 1)
+            + protocol_end
+            + delete_extra_space
+            + process_dot
+            + protocol_default
+        ).optimize()
+
+        protocol |= (
+            pynini.closure(protocol_end + delete_extra_space + process_dot, 0, 1) + protocol_default
+        )
+
+        protocol = pynutil.insert('protocol: "') + protocol.optimize() + pynutil.insert('"')
+        graph |= protocol
+
+        final_graph = self.add_tokens(graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/fraction.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/fraction.py
+from fun_text_processing.inverse_text_normalization.ko.graph_utils import (
+    DAMO_NOT_QUOTE,
+    GraphFst,
+    convert_space,
+    delete_space,
+    delete_extra_space,
+    DAMO_SIGMA,
+    DAMO_CHAR,
+    DAMO_SPACE,
+)
+import pynini
+from pynini.lib import pynutil
+
+
+class FractionFst(GraphFst):
+    """
+    Finite state transducer for classifying fraction
+    """
+
+    def __init__(self, cardinal: GraphFst):
+        super().__init__(name="fraction", kind="classify")
+        # integer_part # numerator # denominator
+
+        graph_cardinal = cardinal.graph_no_exception
+
+        # without the integerate part
+        # 分子
+        numerator = pynutil.insert('numerator: "') + graph_cardinal + pynutil.insert('"')
+        # 分母
+        denominator = (
+            pynutil.insert('denominator: "')
+            + graph_cardinal
+            + pynutil.delete("분의")
+            + pynutil.insert('"')
+        )
+
+        ##
+        graph_fraction_component = denominator + pynini.cross(" ", " ") + numerator
+
+        self.graph_fraction_component = graph_fraction_component
+
+        graph = graph_fraction_component
+        graph = graph.optimize()
+        self.final_graph_wo_negative = graph
+
+        ##负
+        optional_graph_negative = pynini.closure(
+            pynutil.insert("negative: ") + pynini.cross("마이너스", '"true"') + DAMO_SPACE, 0, 1
+        )
+
+        graph = optional_graph_negative + graph
+        final_graph = self.add_tokens(graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/measure.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/measure.py
+import pynini
+from fun_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+from fun_text_processing.inverse_text_normalization.ko.graph_utils import (
+    DAMO_SIGMA,
+    GraphFst,
+    convert_space,
+    delete_extra_space,
+    delete_space,
+    get_singulars,
+)
+from pynini.lib import pynutil
+
+
+class MeasureFst(GraphFst):
+    """
+    Finite state transducer for classifying measure
+        e.g. minus twelve kilograms -> measure { negative: "true" cardinal { integer: "12" } units: "kg" }
+
+    Args:
+        cardinal: CardinalFst
+        decimal: DecimalFst
+    """
+
+    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
+        super().__init__(name="measure", kind="classify")
+
+        cardinal_graph = cardinal.graph_no_exception
+        decimal_graph = decimal.final_graph_wo_negative
+
+        unit_graph = pynini.string_file(get_abs_path("data/measurements.tsv"))
+
+        graph_unit = pynini.invert(unit_graph)  # singular -> abbr
+
+        ## 마이너 负
+        optional_graph_negative = pynini.closure(
+            pynutil.insert("negative: ") + pynini.cross("마이너", '"true"') + delete_extra_space,
+            0,
+            1,
+        )
+
+        graph_units = pynutil.insert('units: "') + graph_unit + pynutil.insert('"')
+
+        subgraph_decimal = (
+            pynutil.insert("decimal { ")
+            + optional_graph_negative
+            + decimal_graph
+            + pynutil.insert(" }")
+            + delete_extra_space
+            + graph_units
+        )
+        subgraph_cardinal = (
+            pynutil.insert("cardinal { ")
+            + optional_graph_negative
+            + pynutil.insert('integer: "')
+            + cardinal_graph
+            + pynutil.insert('"')
+            + pynutil.insert(" }")
+            + delete_extra_space
+            + graph_units
+        )
+
+        final_graph = subgraph_decimal | subgraph_cardinal
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/money.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/money.py
+import pynini
+from fun_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+from fun_text_processing.inverse_text_normalization.ko.graph_utils import (
+    DAMO_DIGIT,
+    DAMO_NOT_SPACE,
+    DAMO_SIGMA,
+    GraphFst,
+    convert_space,
+    delete_extra_space,
+    delete_space,
+    get_singulars,
+    insert_space,
+)
+from pynini.lib import pynutil
+
+
+class MoneyFst(GraphFst):
+    """
+    Finite state transducer for classifying money
+        e.g. twelve dollars and five cents -> money { integer_part: "12" fractional_part: 05 currency: "$" }
+
+    Args:
+        cardinal: CardinalFst
+        decimal: DecimalFst
+    """
+
+    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
+        super().__init__(name="money", kind="classify")
+        # quantity, integer_part, fractional_part, currency
+
+        cardinal_graph = cardinal.graph_no_exception
+        decimal_graph = decimal.final_graph_wo_negative
+
+        unit = pynini.string_file(get_abs_path("data/currency.tsv")).invert()
+
+        graph_unit = pynutil.insert('currency: "') + unit + pynutil.insert('"')
+
+        graph_integer = (
+            pynutil.insert('integer_part: "')
+            + cardinal_graph
+            + pynutil.insert('"')
+            + delete_extra_space
+            + graph_unit
+        )
+
+        graph_decimal = decimal_graph + pynutil.insert(" ") + graph_unit
+
+        final_graph = graph_integer | graph_decimal
+
+        final_graph = self.add_tokens(final_graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/punctuation.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/punctuation.py
+import pynini
+from fun_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
+from pynini.lib import pynutil
+
+
+class PunctuationFst(GraphFst):
+    """
+    Finite state transducer for classifying punctuation
+        e.g. a, -> tokens { name: "a" } tokens { name: "," }
+    """
+
+    def __init__(self):
+        super().__init__(name="punctuation", kind="classify")
+
+        s = ",.?"  # here, we only support three type of punctuation
+        punct = pynini.union(*s)
+
+        graph = pynutil.insert('name: "') + punct + pynutil.insert('"')
+
+        self.fst = graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/telephone.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/telephone.py
+import pynini
+from fun_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+from fun_text_processing.inverse_text_normalization.ko.graph_utils import (
+    DAMO_ALNUM,
+    DAMO_ALPHA,
+    DAMO_DIGIT,
+    GraphFst,
+    insert_space,
+)
+from pynini.lib import pynutil
+
+
+def get_serial_number(cardinal):
+    """
+    any alphanumerical character sequence with at least one number with length greater equal to 3
+    """
+    digit = pynini.compose(cardinal.graph_no_exception, DAMO_DIGIT)
+    character = digit | DAMO_ALPHA
+    sequence = character + pynini.closure(pynutil.delete(" ") + character, 2)
+    sequence = sequence @ (pynini.closure(DAMO_ALNUM) + DAMO_DIGIT + pynini.closure(DAMO_ALNUM))
+    return sequence.optimize()
+
+
+class TelephoneFst(GraphFst):
+    """
+    Finite state transducer for classifying telephone numbers, e.g.
+        one two three one two three five six seven eight -> { number_part: "123-123-5678" }
+
+    This class also support card number and IP format.
+        "one two three dot one double three dot o dot four o" -> { number_part: "123.133.0.40"}
+
+        "three two double seven three two one four three two one four three double zero five" ->
+            { number_part: 3277 3214 3214 3005}
+
+    Args:
+        cardinal: CardinalFst
+    """
+
+    def __init__(self, cardinal: GraphFst):
+        super().__init__(name="telephone", kind="classify")
+        # country code, number_part, extension
+
+        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
+        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
+        graph_dot = pynini.string_file(get_abs_path("data/numbers/dot.tsv"))
+
+        graph_digits = graph_digit | graph_zero
+
+        phone_number_graph = graph_digits**9 | graph_digits**10 | graph_digits**11
+
+        country_code = (
+            pynutil.insert('country_code: "')
+            + pynini.closure(pynini.cross("더한", "+"), 0, 1)
+            + (pynini.closure(graph_digits, 0, 2) + graph_digits)
+            + pynutil.insert('"')
+        )
+
+        optional_country_code = pynini.closure(
+            country_code + pynutil.delete(" ") + insert_space, 0, 1
+        ).optimize()
+
+        grpah_phone_number = (
+            pynutil.insert('number_part: "') + phone_number_graph + pynutil.insert('"')
+        )
+
+        graph = optional_country_code + grpah_phone_number
+
+        # ip
+        ip_graph = graph_digit.plus + (graph_dot + graph_digits.plus).plus
+
+        graph |= pynutil.insert('number_part: "') + ip_graph.optimize() + pynutil.insert('"')
+        graph |= (
+            pynutil.insert('number_part: "')
+            + pynutil.add_weight(get_serial_number(cardinal=cardinal), weight=0.0001)
+            + pynutil.insert('"')
+        )
+
+        final_graph = self.add_tokens(graph)
+        self.fst = final_graph.optimize()
--- a/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/time.py
+++ b/FunASR/fun_text_processing/inverse_text_normalization/ko/taggers/time.py
+import pynini
+from fun_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
+from fun_text_processing.inverse_text_normalization.ko.utils import get_abs_path, num_to_word
+from fun_text_processing.inverse_text_normalization.ko.graph_utils import (
+    GraphFst,
+    convert_space,
+    delete_extra_space,
+    delete_space,
+    insert_space,
+)
+from pynini.lib import pynutil
+
+
+class TimeFst(GraphFst):
+    """
+    Finite state transducer for classifying time
+        e.g. twelve thirty -> time { hours: "12" minutes: "30" }
+        e.g. twelve past one -> time { minutes: "12" hours: "1" }
+        e.g. two o clock a m -> time { hours: "2" suffix: "a.m." }
+        e.g. quarter to two -> time { hours: "1" minutes: "45" }
+        e.g. quarter past two -> time { hours: "2" minutes: "15" }
+        e.g. half past two -> time { hours: "2" minutes: "30" }
+    """
+
+    def __init__(self):
+        super().__init__(name="time", kind="classify")
+        # hours, minutes, seconds, suffix, zone, style, speak_period
+
+        suffix_graph = pynini.string_file(get_abs_path("data/time/time_suffix.tsv"))
+        time_zone_graph = pynini.invert(pynini.string_file(get_abs_path("data/time/time_zone.tsv")))
+
+        hour_graph = pynini.string_file(get_abs_path("data/time/hours.tsv"))
+        minute_graph = pynini.string_file(get_abs_path("data/time/minutes.tsv"))
+        second_graph = pynini.string_file(get_abs_path("data/time/seconds.tsv"))
+
+        # only used for < 1000 thousand -> 0 weight
+        # cardinal = pynutil.add_weight(CardinalFst().graph_no_exception, weight=-0.7)
+
+        graph_hour = hour_graph
+        graph_minute = minute_graph
+        graph_second = second_graph
+
+        final_graph_hour = pynutil.insert('hours: "') + graph_hour + pynutil.insert('"')
+
+        final_suffix = (
+            pynutil.insert('suffix: "') + convert_space(suffix_graph) + pynutil.insert('"')
+        )
+        final_suffix = delete_space + insert_space + final_suffix
+        final_suffix_optional = pynini.closure(final_suffix, 0, 1)
+        final_time_zone_optional = pynini.closure(
+            delete_space
+            + insert_space
+            + pynutil.insert('zone: "')
+            + convert_space(time_zone_graph)
+            + pynutil.insert('"'),
+            0,
+            1,
+        )
+
+        graph_hm = (
+            final_graph_hour
+            + delete_extra_space
+            + pynutil.insert('minutes: "')
+            + graph_minute
+            + pynutil.insert('"')
+        )
+
+        graph_hms = (
+            final_graph_hour
+            + delete_extra_space
+            + pynutil.insert('minutes: "')
+            + graph_minute
+            + pynutil.insert('"')
+            + delete_extra_space
+            + pynutil.insert('seconds: "')
+            + graph_second
+            + pynutil.insert('"')
+        )
+
+        graph_h = (
+            final_graph_hour
+            + delete_extra_space
+            + pynutil.insert('minutes: "')
+            + (pynutil.insert("00") | graph_minute)
+            + pynutil.insert('"')
+            + final_suffix
+            + final_time_zone_optional
+        )
+
+        final_graph = (graph_hm | graph_hms) + final_suffix_optional + final_time_zone_optional
+
+        final_graph |= graph_h
+
+        final_graph = self.add_tokens(final_graph.optimize())
+
+        self.fst = final_graph.optimize()