update

bffed0fe · dengjb · bffed0fe · bffed0fe · bffed0fe · bffed0fe
Commit bffed0fe authored Nov 19, 2025 by dengjb
20 changed files
--- a/cdm/assets/demo/cdm_demo.png
+++ b/cdm/assets/demo/cdm_demo.png
--- a/cdm/assets/demo/cdm_framework.png
+++ b/cdm/assets/demo/cdm_framework.png
--- a/cdm/assets/demo/cdm_framework_new.png
+++ b/cdm/assets/demo/cdm_framework_new.png
--- a/cdm/assets/example/input_example.json
+++ b/cdm/assets/example/input_example.json
+[
+    {
+        "img_id": "case_1",
+        "gt": "r = \\frac { \\alpha } { \\beta } \\vert \\sin \\beta \\left( \\sigma _ { 1 } \\pm \\sigma _ { 2 } \\right) \\vert",
+        "pred": "r={\\frac{\\alpha}{\\beta}}|\\sin\\beta\\left(\\sigma_{2}+\\sigma_{1}\\right)|"
+    },
+    {
+        "img_id": "case_2",
+        "gt": "y = 2z + 3x",
+        "pred": "y = 2x + 3z"
+    },
+    {
+        "img_id": "case_3",
+        "gt": "\\begin{array} { r l r } & { } & { \\mathbf { J } _ { L } = \\left( \\begin{array} { c c } { 0 } & { 0 } \\\\ { v _ { n } } & { 0 } \\end{array} \\right) , ~ \\mathbf { J } _ { R } = \\left( \\begin{array} { c c } { u _ { n - 1 } } & { 0 } \\\\ { 0 } & { 0 } \\end{array} \\right) , ~ } \\\\ & { } & { ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ \\mathbf { K } = \\left( \\begin{array} { c c } { V _ { n - 1 } } & { u _ { n } } \\\\ { v _ { n - 1 } } & { V _ { n } } \\end{array} \\right) , } \\end{array}",
+        "pred": "\\mathbf{J}_{R}={\\left(\\begin{array}{l l}{0}&{0}\\\\ {v_{n}}&{0}\\end{array}\\right)}\\,,\\ \\mathbf{J}_{L}={\\left(\\begin{array}{l l}{u_{n-1}}&{0}\\\\ {0}&{0}\\end{array}\\right)}\\,,\\mathbf{K}={\\left(\\begin{array}{l l}{V_{n-1}}&{u_{n}}\\\\ {v_{n-1}}&{V_{n}}\\end{array}\\right)}\\,,"
+    }
+]
\ No newline at end of file
--- a/cdm/assets/example/table_example.json
+++ b/cdm/assets/example/table_example.json
+[
+    {
+        "pred": "\\begin{tabular}{cccc}Property&Value\\\\ \\hline$e^{min}_c$&0.069\\\\ $e^{min}_d$&0.074\\\\ $e^{max}_c$&0.39\\\\ $e^{max}_d$&0.365\\\\ $\\epsilon$&0.17\\\\ $i^{min}_c$&$16.0^\\circ$\\\\ $i^{min}_d$&$11.6^\\circ$\\\\ $i^{max}_c$&$20.4^\\circ$\\\\ $i^{max}_d$&$16.7^\\circ$\\\\ $\\Psi^{min}$&$27.6^\\circ$\\\\ $\\Psi^{max}$&$37.1^\\circ$\\\\ $\\beta/\\beta_{crit}$&1.075\\\\ \\end{tabular} ",
+        "gt": "\\begin{tabular}{cc}Property&Value\\\\\\hline$e^{min}_c$&0.069\\\\$e^{min}_d$&0.074\\\\$e^{max}_c$&0.39\\\\$e^{max}_d$&0.365\\\\$\\epsilon$&0.17\\\\$i^{min}_c$&$16.0^\\circ$\\\\$i^{min}_d$&$11.6^\\circ$\\\\$i^{max}_c$&$20.4^\\circ$\\\\$i^{max}_d$&$16.7^\\circ$\\\\$\\Psi^{min}$&$27.6^\\circ$\\\\$\\Psi^{max}$&$37.1^\\circ$\\\\$\\beta/\\beta_{crit}$&1.075\\\\ \\end{tabular}\n"
+    },
+    {
+        "pred": "\\begin{tabular}{l r r r r}\\hline Element&\\multicolumn{2}{c}{fully convective}&\\multicolumn{2}{c}{convective at$\\tau_{\\rm R}=3.2$}\\\\ &$\\log\\tau_{\\rm diff}$[yrs]&$\\dot{m}$[g\\,s$^{-1}$]&$\\log\\tau_{\\rm diff}$[yrs]&$\\dot{m}$[g\\,s$^{-1}$]\\\\ \\hline$12$Mg&$-0.46$&$1.7\\times10^8$&$-2.2$&$7.3\\times10^7$\\\\ $14$Si&$-0.36$&$\\leq2.5\\times10^8$&$-2.5$&$\\leq2.9\\times10^8$\\\\ $20$Ca&$-0.37$&$7.2\\times10^6$&$-2.3$&$5.0\\times10^6$\\\\ \\hline\\end{tabular} ",
+        "gt": "\\begin{tabular}{lrrrr}\\hline\nElement&\\multicolumn{2}{c}{fully convective}&\\multicolumn{2}{c}{convective at$\\tau_\\mathrm{R}=3.2$}\\\\ &$\\log\\tau_\\mathrm{diff}$[yrs]&$\\dot m~[\\mathrm{g\\,s^{-1}}]$&$\\log\\tau_\\mathrm{diff}$[yrs]&$\\dot m~[\\mathrm{g\\,s^{-1}}]$\\\\\\hline12Mg&$-0.46$&$1.7\\times10^8$&$-2.2$&$7.3\\times10^7$\\\\14Si&$-0.36$&$\\leq2.5\\times10^8$&$-2.5$&$\\leq2.9\\times10^8$\\\\20Ca&$-0.37$&$7.2\\times10^6$&$-2.3$&$5.0\\times10^6$\\\\\\hline\\end{tabular}\n"
+    }
+]
\ No newline at end of file
--- a/cdm/convert2cdm_format.py
+++ b/cdm/convert2cdm_format.py
+import os
+import json
+from tqdm import tqdm
+import argparse
+def change_data_format(input_json, output_json):
+    with open(input_json,'r') as f:    
+        all_datas = json.load(f)
+    data_list = []
+    for key in all_datas.keys():
+        subset = key[-4:-1].lower()
+        for data in tqdm(all_datas[key]['text']):
+            im_id = os.path.basename(data['image_path'])[0:-4]
+            basename = f"{subset}_{im_id}"
+            new_item = {
+                "img_id": basename,
+                "gt": data["reference"],
+                "pred": data["prediction"]
+            }
+            data_list.append(new_item)
+    with open(output_json, "w") as f:
+        f.write(json.dumps(data_list, indent=2))
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input', '-i', type=str)
+    parser.add_argument('--output', '-o', type=str)
+    args = parser.parse_args()
+    print(args)
+    change_data_format(args.input, args.output)
\ No newline at end of file
--- a/cdm/evaluation.py
+++ b/cdm/evaluation.py
+import sys
+import os
+import re
+import json
+import time
+import shutil
+import argparse
+import numpy as np
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+from datetime import datetime
+from multiprocessing import Pool
+from multiprocessing.dummy import Pool as ThreadPool 
+from PIL import Image, ImageDraw
+from skimage.measure import ransac
+from modules.latex2bbox_color import latex2bbox_color
+from modules.tokenize_latex.tokenize_latex import tokenize_latex
+from modules.visual_matcher import HungarianMatcher, SimpleAffineTransform
+def gen_color_list(num=10, gap=15):
+    num += 1
+    single_num = 255 // gap + 1
+    max_num = single_num ** 3
+    num = min(num, max_num)
+    color_list = []
+    for idx in range(num):
+        R = idx // single_num**2
+        GB = idx % single_num**2
+        G = GB // single_num
+        B = GB % single_num
+        color_list.append((R*gap, G*gap, B*gap))
+    return color_list[1:]
+def update_inliers(ori_inliers, sub_inliers):
+    inliers = np.copy(ori_inliers)
+    sub_idx = -1
+    for idx in range(len(ori_inliers)):
+        if ori_inliers[idx] == False:
+            sub_idx += 1
+            if sub_inliers[sub_idx] == True:
+                inliers[idx] = True
+    return inliers
+def reshape_inliers(ori_inliers, sub_inliers):
+    inliers = np.copy(ori_inliers)
+    sub_idx = -1
+    for idx in range(len(ori_inliers)):
+        if ori_inliers[idx] == False:
+            sub_idx += 1
+            if sub_inliers[sub_idx] == True:
+                inliers[idx] = True
+        else:
+            inliers[idx] = False
+    return inliers
+def gen_token_order(box_list):
+    new_box_list = copy.deepcopy(box_list)
+    for idx, box in enumerate(new_box_list):
+        new_box_list[idx]['order'] = idx / len(new_box_list)
+    return new_box_list
+def evaluation(data_root, user_id="test"):
+    data_root = os.path.join(data_root, user_id)
+    gt_box_dir = os.path.join(data_root, "gt")
+    pred_box_dir = os.path.join(data_root, "pred")
+    match_vis_dir = os.path.join(data_root, "vis_match")
+    os.makedirs(match_vis_dir, exist_ok=True)
+    max_iter = 5
+    min_samples = 2
+    residual_threshold = 20
+    max_trials = 50
+    metrics_per_img = {}
+    gt_basename_list = [item.split(".")[0] for item in os.listdir(os.path.join(gt_box_dir, 'bbox'))]
+    for basename in tqdm(gt_basename_list):
+        gt_valid, pred_valid = True, True
+        if not os.path.exists(os.path.join(gt_box_dir, 'bbox', basename+".jsonl")):
+            gt_valid = False
+        else:
+            with open(os.path.join(gt_box_dir, 'bbox', basename+".jsonl"), 'r') as f:
+                box_gt = []
+                for line in f:
+                    info = json.loads(line)
+                    if info['bbox']:
+                        box_gt.append(info)
+            if not box_gt:
+                gt_valid = False
+        if not gt_valid:
+            continue
+        if not os.path.exists(os.path.join(pred_box_dir, 'bbox', basename+".jsonl")):
+            pred_valid = False
+        else:
+            with open(os.path.join(pred_box_dir, 'bbox', basename+".jsonl"), 'r') as f:
+                box_pred = []
+                for line in f:
+                    info = json.loads(line)
+                    if info['bbox']:
+                        box_pred.append(info)
+            if not box_pred:
+                pred_valid = False
+        if not pred_valid:
+            metrics_per_img[basename] = {
+                "recall": 0,
+                "precision": 0,
+                "F1_score": 0,
+            }
+            continue       
+        gt_img_path = os.path.join(gt_box_dir, 'vis', basename+"_base.png")
+        pred_img_path = os.path.join(pred_box_dir, 'vis', basename+"_base.png")
+        img_gt = Image.open(gt_img_path)
+        img_pred = Image.open(pred_img_path)
+        matcher = HungarianMatcher()
+        matched_idxes = matcher(box_gt, box_pred, img_gt.size, img_pred.size)
+        src = []
+        dst = []
+        for (idx1, idx2) in matched_idxes:
+            x1min, y1min, x1max, y1max = box_gt[idx1]['bbox']
+            x2min, y2min, x2max, y2max = box_pred[idx2]['bbox']
+            x1_c, y1_c = float((x1min+x1max)/2), float((y1min+y1max)/2)
+            x2_c, y2_c = float((x2min+x2max)/2), float((y2min+y2max)/2)
+            src.append([y1_c, x1_c])
+            dst.append([y2_c, x2_c])
+        src = np.array(src)
+        dst = np.array(dst)
+        if src.shape[0] <= min_samples:
+            inliers = np.array([True for _ in matched_idxes])
+        else:
+            inliers = np.array([False for _ in matched_idxes])
+            for i in range(max_iter):
+                if src[inliers==False].shape[0] <= min_samples:
+                    break
+                model, inliers_1 = ransac((src[inliers==False], dst[inliers==False]), SimpleAffineTransform, min_samples=min_samples, residual_threshold=residual_threshold, max_trials=max_trials, random_state=42)
+                if inliers_1 is not None and inliers_1.any():
+                    inliers = update_inliers(inliers, inliers_1)
+                else:
+                    break
+                if len(inliers[inliers==True]) >= len(matched_idxes):
+                    break
+        for idx, (a,b) in enumerate(matched_idxes):
+            if inliers[idx] == True and matcher.cost['token'][a, b] == 1:
+                inliers[idx] = False
+        final_match_num = len(inliers[inliers==True])
+        recall = round(final_match_num/(len(box_gt)), 3)
+        precision = round(final_match_num/(len(box_pred)), 3)
+        F1_score = round(2*final_match_num/(len(box_gt)+len(box_pred)), 3)
+        metrics_per_img[basename] = {
+            "recall": recall,
+            "precision": precision,
+            "F1_score": F1_score,
+        }
+        if True:
+            gap = 5
+            W1, H1 = img_gt.size
+            W2, H2 = img_pred.size
+            H = H1 + H2 + gap
+            W = max(W1, W2)
+            vis_img = Image.new('RGB', (W, H), (255, 255, 255))
+            vis_img.paste(img_gt, (0, 0))
+            vis_img.paste(Image.new('RGB', (W, gap), (120, 120, 120)), (0, H1))
+            vis_img.paste(img_pred, (0, H1+gap))
+            match_img = vis_img.copy()
+            match_draw = ImageDraw.Draw(match_img)
+            gt_matched_idx = {
+                a: flag
+                for (a,b), flag in 
+                zip(matched_idxes, inliers)
+            }
+            pred_matched_idx = {
+                b: flag
+                for (a,b), flag in 
+                zip(matched_idxes, inliers)
+            }
+            for idx, box in enumerate(box_gt):
+                if idx in gt_matched_idx and gt_matched_idx[idx]==True:
+                    color = "green"
+                else:
+                    color = "red"
+                x_min, y_min, x_max, y_max = box['bbox']
+                match_draw.rectangle([x_min-1, y_min-1, x_max+1, y_max+1], fill=None, outline=color, width=2)
+            for idx, box in enumerate(box_pred):
+                if idx in pred_matched_idx and pred_matched_idx[idx]==True:
+                    color = "green"
+                else:
+                    color = "red"
+                x_min, y_min, x_max, y_max = box['bbox']
+                match_draw.rectangle([x_min-1, y_min-1+H1+gap, x_max+1, y_max+1+H1+gap], fill=None, outline=color, width=2)
+            vis_img.save(os.path.join(match_vis_dir, basename+"_base.png"))
+            match_img.save(os.path.join(match_vis_dir, basename+".png"))
+    score_list = [val['F1_score'] for _, val in metrics_per_img.items()]
+    exp_list = [1 if score==1 else 0 for score in score_list]
+    metrics_res = {
+        "mean_score": round(np.mean(score_list), 3),
+        "exp_rate": round(np.mean(exp_list), 3),
+        "details": metrics_per_img
+    }
+    metric_res_path = os.path.join(data_root, "metrics_res.json")
+    with open(metric_res_path, "w") as f:
+        f.write(json.dumps(metrics_res, indent=2))
+    return metrics_res, metric_res_path, match_vis_dir
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input', '-i', type=str, default="assets/example/input_example.json")
+    parser.add_argument('--output', '-o', type=str, default="output")
+    parser.add_argument('--pools', '-p', type=int, default=240)
+    args = parser.parse_args()
+    print(args)
+    json_input, data_root, pool_num = args.input, args.output, args.pools
+    temp_dir = os.path.join(data_root, "temp_dir")
+    exp_name = os.path.basename(json_input).split('.')[0]
+    with open(json_input, "r") as f:
+        input_data = json.load(f)
+    img_ids = []
+    groundtruths = []
+    predictions = []
+    for idx, item in enumerate(input_data):
+        if "img_id" in item:
+            img_ids.append(item["img_id"])
+        else:
+            img_ids.append(f"sample_{idx}")
+        groundtruths.append(item['gt'])
+        predictions.append(item['pred'])
+    a = time.time()
+    user_id = exp_name
+    total_color_list = gen_color_list(num=5800)
+    data_root = os.path.join(data_root, user_id)
+    output_dir_info = {}
+    input_args = []
+    for subset, latex_list in zip(['gt', 'pred'], [groundtruths, predictions]):
+        sub_temp_dir = os.path.join(temp_dir, f"{exp_name}_{subset}")
+        os.makedirs(sub_temp_dir, exist_ok=True)
+        output_path = os.path.join(data_root, subset)
+        output_dir_info[output_path] = []
+        os.makedirs(os.path.join(output_path, 'bbox'), exist_ok=True)
+        os.makedirs(os.path.join(output_path, 'vis'), exist_ok=True)
+        for idx, latex in tqdm(enumerate(latex_list), desc=f"collect {subset} latex ..."):
+            basename = img_ids[idx]
+            input_arg = latex, basename, output_path, sub_temp_dir, total_color_list
+            input_args.append(input_arg)
+    if pool_num > 1:
+        print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "using processpool, pool num:", pool_num, ", job num:", len(input_args))
+        myP = Pool(args.pools)
+        for input_arg in input_args:
+            myP.apply_async(latex2bbox_color, args=(input_arg,))
+        myP.close()
+        myP.join()
+    else:
+        for input_arg in input_args:
+            latex2bbox_color(input_arg)
+    b = time.time()
+    print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "extract bbox done, time cost:", round(b-a, 3), "s")
+    for subset in ['gt', 'pred']:
+        shutil.rmtree(os.path.join(temp_dir, f"{exp_name}_{subset}"))
+    c = time.time()
+    metrics_res, metric_res_path, match_vis_dir = evaluation(args.output, exp_name)
+    d = time.time()
+    print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "calculate metrics done, time cost:", round(d-c, 3), "s")
+    print(f"=> process done, mean f1 score: {metrics_res['mean_score']}.")
+    print(f"=> more details of metrics are saved in `{metric_res_path}`")
+    print(f"=> visulization images are saved under `{match_vis_dir}`")
\ No newline at end of file
--- a/cdm/modules/latex2bbox_color.py
+++ b/cdm/modules/latex2bbox_color.py
+import os
+import re
+import cv2
+import json
+import shutil
+import logging
+import subprocess
+import numpy as np
+from threading import Timer
+from PIL import Image, ImageDraw
+from modules.latex_processor import (
+    normalize_latex,
+    token_add_color_RGB,
+    clean_latex
+)
+from modules.tokenize_latex.tokenize_latex import tokenize_latex
+tabular_template = r"""
+\documentclass[12pt]{article}
+\usepackage[landscape]{geometry}
+\usepackage{geometry}
+\geometry{a<PaperSize>paper,scale=0.98}
+\pagestyle{empty}
+\usepackage{booktabs}
+\usepackage{multirow}
+\usepackage{amssymb}
+\usepackage{upgreek}
+\usepackage{amsmath}
+\usepackage{xcolor}
+\begin{document}
+\makeatletter
+\renewcommand*{\@textcolor}[3]{%%
+  \protect\leavevmode
+  \begingroup
+    \color#1{#2}#3%%
+  \endgroup
+}
+\makeatother
+\begin{displaymath}
+%s
+\end{displaymath}
+\end{document}
+"""
+formular_template = r"""
+\documentclass[12pt]{article}
+\usepackage[landscape]{geometry}
+\usepackage{geometry}
+\geometry{a<PaperSize>paper,scale=0.98}
+\pagestyle{empty}
+\usepackage{booktabs}
+\usepackage{amsmath}
+\usepackage{upgreek}
+\usepackage{amssymb}
+\usepackage{xcolor}
+\begin{document}
+\makeatletter
+\renewcommand*{\@textcolor}[3]{%%
+  \protect\leavevmode
+  \begingroup
+    \color#1{#2}#3%%
+  \endgroup
+}
+\makeatother
+\begin{displaymath}
+%s
+\end{displaymath}
+\end{document}
+"""
+formular_template_zh = r"""
+\documentclass[12pt]{article}
+\usepackage[landscape]{geometry}
+\usepackage{geometry}
+\geometry{a<PaperSize>paper,scale=0.98}
+\pagestyle{empty}
+\usepackage{booktabs}
+\usepackage{amsmath}
+\usepackage{upgreek}
+\usepackage{CJK}
+\usepackage{amssymb}
+\usepackage{xcolor}
+\begin{document}
+\makeatletter
+\renewcommand*{\@textcolor}[3]{%%
+  \protect\leavevmode
+  \begingroup
+    \color#1{#2}#3%%
+  \endgroup
+}
+\makeatother
+\begin{CJK}{UTF8}{gkai}
+\begin{displaymath}
+%s
+\end{displaymath}
+\end{CJK}
+\end{document}
+"""
+def run_cmd(cmd, timeout_sec=30):
+    proc = subprocess.Popen(cmd, shell=True)
+    kill_proc = lambda p: p.kill()
+    timer = Timer(timeout_sec, kill_proc, [proc])
+    try:
+        timer.start()
+        stdout,stderr = proc.communicate()
+    finally:
+        timer.cancel()
+def convert_pdf2img(pdf_filename, png_filename):
+    cmd = "magick -density 200 -quality 100 %s %s"%(pdf_filename, png_filename)
+    os.system(cmd)
+def crop_image(image_path, pad=8):
+    img = Image.open(image_path).convert("L")
+    img_data = np.asarray(img, dtype=np.uint8)
+    nnz_inds = np.where(img_data!=255)
+    if len(nnz_inds[0]) == 0:
+        y_min = 0
+        y_max = 10
+        x_min = 0
+        x_max = 10
+    else:
+        y_min = np.min(nnz_inds[0])
+        y_max = np.max(nnz_inds[0])
+        x_min = np.min(nnz_inds[1])
+        x_max = np.max(nnz_inds[1])
+    img = Image.open(image_path).convert("RGB").crop((x_min-pad, y_min-pad, x_max+pad, y_max+pad))
+    img.save(image_path)
+def extrac_bbox_from_color_image(image_path, color_list):
+    img = cv2.imread(image_path)
+    bbox_list = []
+    for target_color in color_list:
+        r, g, b = target_color
+        target_rgb = np.array([b, g, r], dtype=np.uint8)
+        mask = np.all(img == target_rgb, axis=2)
+        coords = np.argwhere(mask)
+        if coords.size > 0:
+            x_min, y_min = coords[:, 1].min(), coords[:, 0].min()
+            x_max, y_max = coords[:, 1].max(), coords[:, 0].max()
+            bbox_list.append([int(x_min-1), int(y_min-1), int(x_max+1), int(y_max+1)])
+        else:
+            bbox_list.append([])
+    img = Image.open(image_path).convert("RGB").convert("L")
+    img_bw = img.point(lambda x: 255 if x == 255 else 0, '1')
+    img_bw.convert("RGB").save(image_path)
+    return bbox_list
+def contains_chinese(text):
+    # 匹配中文字符的正则表达式范围
+    return re.search(r'[\u4e00-\u9fff]', text) is not None
+def wrap_chinese_in_text(latex_text):
+    chinese_pattern = r'[\u4e00-\u9fff\u3400-\u4dbf]'
+    # 匹配连续的中文字符
+    chinese_sequence_pattern = chinese_pattern + '+'
+    def replace_chinese(match):
+        chinese_text = match.group(0)
+        # 检查是否已经被\text{}包裹
+        start_pos = match.start()
+        end_pos = match.end()
+        # 检查匹配位置前后是否有\text{和}
+        before_text = latex_text[max(0, start_pos-6):start_pos]
+        after_text = latex_text[end_pos:min(len(latex_text), end_pos+1)]
+        if before_text.endswith('\\text{') and after_text.startswith('}'):
+            return chinese_text
+        else:
+            return f'\\text{{{chinese_text}}}'
+    # 替换所有连续的中文字符
+    result = re.sub(chinese_sequence_pattern, replace_chinese, latex_text)
+    return result
+def latex2bbox_color(input_arg):
+    latex, basename, output_path, temp_dir, total_color_list = input_arg
+    if "tabular" in latex:
+        template = tabular_template
+    else:
+        if contains_chinese(latex):
+            template = formular_template_zh
+            latex = latex.replace("，", ", ").replace("：", ": ").replace("；", "; ")
+            latex = wrap_chinese_in_text(latex)
+        else:
+            template = formular_template
+    output_bbox_path = os.path.join(output_path, 'bbox', basename+'.jsonl')
+    output_vis_path = os.path.join(output_path, 'vis', basename+'.png')
+    output_base_path = os.path.join(output_path, 'vis', basename+'_base.png')
+    if os.path.exists(output_bbox_path) and os.path.exists(output_vis_path) and os.path.exists(output_base_path):
+        return
+    try:
+        latex = latex.replace("\n", " ")
+        latex = latex.replace("\%", "<PERCENTAGETOKEN>")
+        ret, new_latex = tokenize_latex(latex, middle_file=os.path.join(temp_dir, basename+'.txt'))
+        if not(ret and new_latex):
+            log = f"ERROR, Tokenize latex failed: {basename}."
+            logging.info(log)
+            new_latex = latex
+        if contains_chinese(new_latex):
+            new_latex = new_latex.replace("\\mathrm", "\\text")
+        new_latex = new_latex.replace("< P E R C E N T A G E T O K E N >", "\%")
+        latex = normalize_latex(new_latex)
+        token_list = []
+        l_split = latex.strip().split(' ')
+        color_list = total_color_list[0:len(l_split)]
+        idx = 0
+        while idx < len(l_split):
+            l_split, idx, token_list = token_add_color_RGB(l_split, idx, token_list)
+        rgb_latex = " ".join(l_split)
+        for idx, color in enumerate(color_list):
+            R, G, B = color
+            rgb_latex = rgb_latex.replace(f"<color_{idx}>", f"{R},{G},{B}")
+        if len(token_list) > 1300:
+            paper_size = 3
+        elif len(token_list) > 600:
+            paper_size = 4
+        else:
+            paper_size = 5
+        final_latex = template.replace("<PaperSize>", str(paper_size)) % rgb_latex
+    except Exception as e:
+        log = f"ERROR, Preprocess latex failed: {basename}; {e}."
+        logging.info(log)
+        return
+    pre_name = output_path.replace('/', '_').replace('.','_') + '_' + basename
+    tex_filename = os.path.join(temp_dir, pre_name+'.tex')
+    log_filename = os.path.join(temp_dir, pre_name+'.log')
+    aux_filename = os.path.join(temp_dir, pre_name+'.aux')
+    with open(tex_filename, "w") as w: 
+        print(final_latex, file=w)
+    run_cmd(f"pdflatex -interaction=nonstopmode -output-directory={temp_dir} {tex_filename} >/dev/null")
+    try:
+        os.remove(tex_filename)
+        os.remove(log_filename)
+        os.remove(aux_filename)
+    except:
+        pass
+    pdf_filename = tex_filename[:-4]+'.pdf'
+    if not os.path.exists(pdf_filename):
+        log = f"ERROR, Compile pdf failed: {pdf_filename}"
+        logging.info(log)
+    else:
+        convert_pdf2img(pdf_filename, output_base_path)
+        os.remove(pdf_filename)
+        crop_image(output_base_path)
+        bbox_list = extrac_bbox_from_color_image(output_base_path, color_list)
+        vis = Image.open(output_base_path)
+        draw = ImageDraw.Draw(vis)
+        with open(output_bbox_path, 'w', encoding='utf-8') as f:
+            for token, box in zip(token_list, bbox_list):
+                item = {
+                    "bbox": box,
+                    "token": token
+                }
+                f.write(json.dumps(item, ensure_ascii=False)+'\n')
+                if not box:
+                    continue
+                x_min, y_min, x_max, y_max = box
+                draw.rectangle([x_min, y_min, x_max, y_max], fill=None, outline=(0,250,0), width=1)
+                try:
+                    draw.text((x_min, y_min), token, (250,0,0))
+                except:
+                    pass
+        vis.save(output_vis_path)
--- a/cdm/modules/latex_processor.py
+++ b/cdm/modules/latex_processor.py
--- a/cdm/modules/latex_render_percentage.py
+++ b/cdm/modules/latex_render_percentage.py
+import re
+import os
+import json
+import time
+import shutil
+import random
+import argparse
+import subprocess
+import numpy as np
+from tqdm import tqdm
+from multiprocessing import Pool
+formular_template = r"""
+\documentclass[12pt]{article}
+\usepackage[landscape]{geometry}
+\usepackage{geometry}
+\geometry{a5paper,scale=0.98}
+\pagestyle{empty}
+\usepackage{booktabs}
+\usepackage{amsmath}
+\usepackage{amssymb}
+\usepackage{xcolor}
+\begin{document}
+\makeatletter
+\renewcommand*{\@textcolor}[3]{%%
+  \protect\leavevmode
+  \begingroup
+    \color#1{#2}#3%%
+  \endgroup
+}
+\makeatother
+\begin{displaymath}
+%s
+\end{displaymath}
+\end{document}
+"""
+def run_shell_cmd(cmd, max_time=15):
+    child = subprocess.Popen(cmd, shell=True)
+    for i in range(max_time):
+        if child.poll():
+            return True
+        if i == max_time-1:
+            child.kill()
+            return False
+        time.sleep(1)
+    return False
+def render_latex(latex_code, basename, latex_dir, pdf_dir):
+    latex_path = os.path.join(latex_dir, basename + ".tex")
+    pdf_path = os.path.join(pdf_dir, basename + ".pdf")
+    with open(latex_path, "w") as f:
+        f.write(formular_template % latex_code)
+    cmd = f"pdflatex -interaction=nonstopmode -output-directory={pdf_dir} -output-format=pdf {latex_path} >/dev/null"
+    run_shell_cmd(cmd)
+    return pdf_path
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input', '-i', type=str, default='data/pred_results/test.json')
+    parser.add_argument('--clean', action='store_true', default=False)
+    parser.add_argument('--gt', action='store_true', default=False)
+    args = parser.parse_args()
+    if args.gt:
+        output_path = os.path.join("output", 'gt.json')
+        load_key = 'gt'
+    else:
+        load_key = 'pred'
+        output_path = os.path.join("output", os.path.basename(args.input))
+    temp_dir=f"render_temp_dir"
+    try:
+        shutil.rmtree(temp_dir)
+    except:
+        pass
+    latex_dir = os.path.join(temp_dir, "texes")
+    pdf_dir = os.path.join(temp_dir, "pdfs")
+    os.makedirs(latex_dir, exist_ok=True)
+    os.makedirs(pdf_dir, exist_ok=True)
+    with open(args.input, "r") as f:
+        input_data = json.load(f)
+    myP = Pool(200)
+    for idx, item in enumerate(input_data):
+        basename = f"sample_{idx}"
+        myP.apply_async(render_latex, args=(item[load_key], basename, latex_dir, pdf_dir))
+    myP.close()
+    print("processing, may take some times.")
+    myP.join()
+    success_num = 0
+    total_num = 0
+    for idx, item in enumerate(input_data):
+        basename = f"sample_{idx}"
+        total_num += 1
+        pdf_path = os.path.join(pdf_dir, basename + ".pdf")
+        if os.path.exists(pdf_path):
+            success_num += 1
+            item['renderable'] = 1
+        else:
+            item['renderable'] = 0
+    print("total num:", total_num, "render success num:", success_num)
+    with open(output_path, "w") as f:
+        f.write(json.dumps(input_data, indent=2))
+    if args.clean:
+        try:
+            shutil.rmtree(temp_dir)
+        except:
+            pass
\ No newline at end of file
--- a/cdm/modules/tokenize_latex/preprocess_formula.js
+++ b/cdm/modules/tokenize_latex/preprocess_formula.js
+const path = require('path');
+var katex = require(path.join(__dirname,"third_party/katex/katex.js"))
+options = require(path.join(__dirname,"third_party/katex/src/Options.js"))
+var readline = require('readline');
+var rl = readline.createInterface({
+    input: process.stdin,
+    output: process.stdout,
+    terminal: false
+});
+rl.on('line', function(line){
+    a = line
+    if (line[0] == "%") {
+        line = line.substr(1, line.length - 1);
+    }
+    line = line.split('%')[0];
+    line = line.split('\\~').join(' ');
+    for (var i = 0; i < 300; i++) {
+        line = line.replace(/\\>/, " ");
+        line = line.replace('$', ' ');
+        line = line.replace(/\\label{.*?}/, "");
+    }
+    if (line.indexOf("matrix") == -1 && line.indexOf("cases")==-1 &&
+        line.indexOf("array")==-1 && line.indexOf("begin")==-1)  {
+        for (var i = 0; i < 300; i++) {
+            line = line.replace(/\\\\/, "\\,");
+        }
+    }
+    line = line + " "
+    // global_str is tokenized version (build in parser.js)
+    // norm_str is normalized version build by renderer below.
+    try {
+        if (process.argv[2] == "tokenize") {
+            var tree = katex.__parse(line, {});
+            console.log(global_str.replace(/\\label { .*? }/, ""));
+        } else {
+            for (var i = 0; i < 300; ++i) {
+                line = line.replace(/{\\rm/, "\\mathrm{");
+                line = line.replace(/{ \\rm/, "\\mathrm{");
+                line = line.replace(/\\rm{/, "\\mathrm{");
+            }
+            var tree = katex.__parse(line, {});
+            buildExpression(tree, new options({}));            
+            for (var i = 0; i < 300; ++i) {
+                norm_str = norm_str.replace('SSSSSS', '$');
+                norm_str = norm_str.replace(' S S S S S S', '$');
+            }
+            console.log(norm_str.replace(/\\label { .*? }/, ""));
+        }
+    } catch (e) {
+        console.error(line);
+        console.error(norm_str);
+        console.error(e);
+        console.log();
+    }
+    global_str = ""
+    norm_str = ""
+})
+// This is a LaTeX AST to LaTeX Renderer (modified version of KaTeX AST-> MathML).
+norm_str = ""
+var groupTypes = {};
+groupTypes.mathord = function(group, options) {
+    if (options.font == "mathrm"){
+        for (i = 0; i < group.value.length; ++i ) {
+            if (group.value[i] == " ") {
+                norm_str = norm_str + group.value[i] + "\; ";
+            } else {
+                norm_str = norm_str + group.value[i] + " ";
+            }
+        }
+    } else {
+        norm_str = norm_str + group.value + " ";
+    }
+};
+groupTypes.textord = function(group, options) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.bin = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.rel = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.open = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.close = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.inner = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.punct = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.ordgroup = function(group, options) {
+    norm_str = norm_str + "{ ";
+    buildExpression(group.value, options);
+    norm_str = norm_str +  "} ";
+};
+groupTypes.text = function(group, options) {
+    norm_str = norm_str + "\\mathrm { ";
+    buildExpression(group.value.body, options);
+    norm_str = norm_str + "} ";
+};
+groupTypes.color = function(group, options) {
+    var inner = buildExpression(group.value.value, options);
+    var node = new mathMLTree.MathNode("mstyle", inner);
+    node.setAttribute("mathcolor", group.value.color);
+    return node;
+};
+groupTypes.supsub = function(group, options) {
+    buildGroup(group.value.base, options);
+    if (group.value.sub) {
+        norm_str = norm_str + "_ ";
+        if (group.value.sub.type != 'ordgroup') {
+            norm_str = norm_str + " { ";
+            buildGroup(group.value.sub, options);
+            norm_str = norm_str + "} ";
+        } else {
+            buildGroup(group.value.sub, options);
+        }
+    }
+    if (group.value.sup) {
+        norm_str = norm_str + "^ ";
+        if (group.value.sup.type != 'ordgroup') {
+            norm_str = norm_str + " { ";
+            buildGroup(group.value.sup, options);
+            norm_str = norm_str + "} ";
+        } else {
+            buildGroup(group.value.sup, options);
+        }
+    }
+};
+groupTypes.genfrac = function(group, options) {
+    if (!group.value.hasBarLine) {
+        norm_str = norm_str + "\\binom ";
+    } else {
+        norm_str = norm_str + "\\frac ";
+    }
+    buildGroup(group.value.numer, options);
+    buildGroup(group.value.denom, options);
+};
+groupTypes.array = function(group, options) {
+    norm_str = norm_str + "\\begin{array} { ";
+    if (group.value.cols) {
+        group.value.cols.map(function(start) {
+            if (start && start.align) {
+                norm_str = norm_str + start.align + " ";}});
+    } else {
+        group.value.body[0].map(function(start) {
+            norm_str = norm_str + "l ";
+        } );
+    }
+    norm_str = norm_str + "} ";
+    group.value.body.map(function(row) {
+        if (row.some(cell => cell.value.length > 0)) { // orginal code: if (row[0].value.length > 0)
+            out = row.map(function(cell) {
+                buildGroup(cell, options);
+                if (norm_str.length > 4 
+                    && norm_str.substring(norm_str.length-4, norm_str.length) == "{ } ") {
+                    norm_str = norm_str.substring(0, norm_str.length-4) ;
+                }
+                norm_str = norm_str + "& ";
+            });
+            norm_str = norm_str.substring(0, norm_str.length-2) + "\\\\ ";
+        }
+    }); 
+    norm_str = norm_str + "\\end{array} ";
+};
+groupTypes.sqrt = function(group, options) {
+    var node;
+    if (group.value.index) {
+        norm_str = norm_str + "\\sqrt [ ";
+        buildExpression(group.value.index.value, options);
+        norm_str = norm_str + "] ";
+        buildGroup(group.value.body, options);
+    } else {
+        norm_str = norm_str + "\\sqrt ";
+        buildGroup(group.value.body, options);
+    }
+};
+groupTypes.leftright = function(group, options) {
+    norm_str = norm_str + "\\left" + group.value.left + " ";
+    buildExpression(group.value.body, options);
+    norm_str = norm_str + "\\right" + group.value.right + " ";
+};
+groupTypes.accent = function(group, options) {
+    if (group.value.base.type != 'ordgroup') {
+        norm_str = norm_str + group.value.accent + " { ";
+        buildGroup(group.value.base, options);
+        norm_str = norm_str + "} ";
+    } else {
+        norm_str = norm_str + group.value.accent + " ";
+        buildGroup(group.value.base, options);
+    }
+};
+groupTypes.spacing = function(group) {
+    var node;
+    if (group.value == " ") {
+        norm_str = norm_str + "~ ";
+    } else {
+        norm_str = norm_str + group.value + " ";
+    }
+    return node;
+};
+groupTypes.op = function(group) {
+    var node;
+    // TODO(emily): handle big operators using the `largeop` attribute
+    if (group.value.symbol) {
+        // This is a symbol. Just add the symbol.
+        norm_str = norm_str + group.value.body + " ";
+    } else {
+        if (group.value.limits == false) {
+            norm_str = norm_str + "\\\operatorname { ";
+        } else {
+            norm_str = norm_str + "\\\operatorname* { ";
+        }
+        for (i = 1; i < group.value.body.length; ++i ) {
+            norm_str = norm_str + group.value.body[i] + " ";
+        }
+        norm_str = norm_str + "} ";
+    }
+};
+groupTypes.katex = function(group) {
+    var node = new mathMLTree.MathNode(
+        "mtext", [new mathMLTree.TextNode("KaTeX")]);
+    return node;
+};
+groupTypes.font = function(group, options) {
+    var font = group.value.font;
+    if (font == "mbox" || font == "hbox") {
+        font = "mathrm";
+    }
+    norm_str = norm_str + "\\" + font + " ";
+    buildGroup(group.value.body, options.withFont(font));    
+};
+groupTypes.delimsizing = function(group) {
+    var children = [];
+    norm_str = norm_str + group.value.funcName + " " + group.value.value + " ";
+};
+groupTypes.styling = function(group, options) {
+    norm_str = norm_str + " " + group.value.original + " ";
+    buildExpression(group.value.value, options);
+};
+groupTypes.sizing = function(group, options) {
+    if (group.value.original == "\\rm") {
+        norm_str = norm_str + "\\mathrm { "; 
+        buildExpression(group.value.value, options.withFont("mathrm"));
+        norm_str = norm_str + "} ";
+    } else {
+        norm_str = norm_str + " " + group.value.original + " ";
+        buildExpression(group.value.value, options);
+    }
+};
+groupTypes.overline = function(group, options) {
+    norm_str = norm_str + "\\overline { ";
+    buildGroup(group.value.body, options);
+    norm_str = norm_str + "} ";
+    norm_str = norm_str;
+};
+groupTypes.underline = function(group, options) {
+    norm_str = norm_str + "\\underline { ";
+    buildGroup(group.value.body, options);
+    norm_str = norm_str + "} ";
+    norm_str = norm_str;
+};
+groupTypes.rule = function(group) {
+    norm_str = norm_str + "\\rule { "+group.value.width.number+" "+group.value.width.unit+"  } { "+group.value.height.number+" "+group.value.height.unit+ " } ";
+};
+groupTypes.llap = function(group, options) {
+    norm_str = norm_str + "\\llap ";
+    buildGroup(group.value.body, options);
+};
+groupTypes.rlap = function(group, options) {
+    norm_str = norm_str + "\\rlap ";
+    buildGroup(group.value.body, options);
+};
+groupTypes.phantom = function(group, options, prev) {
+    norm_str = norm_str + "\\phantom { ";
+    buildExpression(group.value.value, options);
+    norm_str = norm_str + "} ";
+};
+/**
+ * Takes a list of nodes, builds them, and returns a list of the generated
+ * MathML nodes. A little simpler than the HTML version because we don't do any
+ * previous-node handling.
+ */
+var buildExpression = function(expression, options) {
+    var groups = [];
+    for (var i = 0; i < expression.length; i++) {
+        var group = expression[i];
+        buildGroup(group, options);
+    }
+    // console.log(norm_str);
+    // return groups;
+};
+/**
+ * Takes a group from the parser and calls the appropriate groupTypes function
+ * on it to produce a MathML node.
+ */
+var buildGroup = function(group, options) {
+    if (groupTypes[group.type]) {
+        groupTypes[group.type](group, options);
+    } else {
+        throw new ParseError(
+            "Got group of unknown type: '" + group.type + "'");
+    }
+};
--- a/cdm/modules/tokenize_latex/preprocess_tabular.js
+++ b/cdm/modules/tokenize_latex/preprocess_tabular.js
+const path = require('path');
+var katex = require(path.join(__dirname,"third_party/katex/katex.js"))
+options = require(path.join(__dirname,"third_party/katex/src/Options.js"))
+var readline = require('readline');
+var rl = readline.createInterface({
+    input: process.stdin,
+    output: process.stdout,
+    terminal: false
+});
+rl.on('line', function(line){
+    a = line
+    if (line[0] == "%") {
+        line = line.substr(1, line.length - 1);
+    }
+    // line = line.split('%')[0];
+    line = line.split('\\~').join(' ');
+    for (var i = 0; i < 300; i++) {
+        line = line.replace(/\\>/, " ");
+        // line = line.replace('$', ' ');
+        line = line.replace(/\\label{.*?}/, "");
+    }
+    if (line.indexOf("matrix") == -1 && line.indexOf("cases")==-1 &&
+        line.indexOf("array")==-1 && line.indexOf("begin")==-1)  {
+        for (var i = 0; i < 300; i++) {
+            line = line.replace(/\\\\/, "\\,");
+        }
+    }
+    line = line + " "
+    // global_str is tokenized version (build in parser.js)
+    // norm_str is normalized version build by renderer below.
+    try {
+        if (process.argv[2] == "tokenize") {
+            var tree = katex.__parse(line, {});
+            console.log(global_str.replace(/\\label { .*? }/, ""));
+        } else {
+            for (var i = 0; i < 300; ++i) {
+                line = line.replace(/{\\rm/, "\\mathrm{");
+                line = line.replace(/{ \\rm/, "\\mathrm{");
+                line = line.replace(/\\rm{/, "\\mathrm{");
+            }
+            var tree = katex.__parse(line, {});
+            buildExpression(tree, new options({}));            
+            for (var i = 0; i < 300; ++i) {
+                norm_str = norm_str.replace('SSSSSS', '$');
+                norm_str = norm_str.replace(' S S S S S S', '$');
+            }
+            console.log(norm_str.replace(/\\label { .*? }/, ""));
+        }
+    } catch (e) {
+        console.error(line);
+        console.error(norm_str);
+        console.error(e);
+        console.log("");
+    }
+    global_str = ""
+    norm_str = ""
+})
+// This is a LaTeX AST to LaTeX Renderer (modified version of KaTeX AST-> MathML).
+norm_str = ""
+var groupTypes = {};
+groupTypes.mathord = function(group, options) {
+    if (options.font == "mathrm"){
+        for (i = 0; i < group.value.length; ++i ) {
+            if (group.value[i] == " ") {
+                norm_str = norm_str + group.value[i] + "\; ";
+            } else {
+                norm_str = norm_str + group.value[i] + " ";
+            }
+        }
+    } else {
+        norm_str = norm_str + group.value + " ";
+    }
+};
+groupTypes.textord = function(group, options) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.bin = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.rel = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.open = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.close = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.inner = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.punct = function(group) {
+    norm_str = norm_str + group.value + " ";
+};
+groupTypes.ordgroup = function(group, options) {
+    norm_str = norm_str + "{ ";
+    buildExpression(group.value, options);
+    norm_str = norm_str +  "} ";
+};
+groupTypes.text = function(group, options) {
+    norm_str = norm_str + "\\mathrm { ";
+    buildExpression(group.value.body, options);
+    norm_str = norm_str + "} ";
+};
+groupTypes.color = function(group, options) {
+    var inner = buildExpression(group.value.value, options);
+    var node = new mathMLTree.MathNode("mstyle", inner);
+    node.setAttribute("mathcolor", group.value.color);
+    return node;
+};
+groupTypes.supsub = function(group, options) {
+    buildGroup(group.value.base, options);
+    if (group.value.sub) {
+        norm_str = norm_str + "_ ";
+        if (group.value.sub.type != 'ordgroup') {
+            norm_str = norm_str + " { ";
+            buildGroup(group.value.sub, options);
+            norm_str = norm_str + "} ";
+        } else {
+            buildGroup(group.value.sub, options);
+        }
+    }
+    if (group.value.sup) {
+        norm_str = norm_str + "^ ";
+        if (group.value.sup.type != 'ordgroup') {
+            norm_str = norm_str + " { ";
+            buildGroup(group.value.sup, options);
+            norm_str = norm_str + "} ";
+        } else {
+            buildGroup(group.value.sup, options);
+        }
+    }
+};
+groupTypes.genfrac = function(group, options) {
+    if (!group.value.hasBarLine) {
+        norm_str = norm_str + "\\binom ";
+    } else {
+        norm_str = norm_str + "\\frac ";
+    }
+    buildGroup(group.value.numer, options);
+    buildGroup(group.value.denom, options);
+};
+groupTypes.array = function(group, options) {
+    norm_str = norm_str + "\\begin{" + group.value.style + "} ";
+    if (group.value.style == "array" || group.value.style == "tabular" || group.value.style == "tabularx") {
+        norm_str = norm_str + "{ ";
+        if (group.value.cols) {
+            group.value.cols.map(function(start) {
+                if (start) {
+                    if (start.type == "align") {
+                        norm_str = norm_str + start.align + " ";
+                    } else if (start.type == "separator") {
+                        norm_str = norm_str + start.separator + " ";
+                    }
+                }
+            });
+        } else {
+            group.value.body[0].map(function(start) {
+                norm_str = norm_str + "c ";
+            } );
+        }
+        norm_str = norm_str + "} ";
+    }
+    group.value.body.map(function(row) {
+        if (row.length > 1 || row[0].value.length > 0) {
+            if (row[0].value[0] && row[0].value[0].value == "\\hline") {
+                norm_str = norm_str + "\\hline ";
+                row[0].value = row[0].value.slice(1);
+            }
+            out = row.map(function(cell) {
+                buildGroup(cell, options);
+                norm_str = norm_str + "& ";
+            });
+            norm_str = norm_str.substring(0, norm_str.length-2) + "\\\\ ";
+        }
+    }); 
+    norm_str = norm_str + "\\end{" + group.value.style + "} ";
+};
+groupTypes.sqrt = function(group, options) {
+    var node;
+    if (group.value.index) {
+        norm_str = norm_str + "\\sqrt [ " + group.value.index + " ] ";
+        buildGroup(group.value.body, options);
+    } else {
+        norm_str = norm_str + "\\sqrt ";
+        buildGroup(group.value.body, options);
+    }
+};
+groupTypes.leftright = function(group, options) {
+    norm_str = norm_str + "\\left" + group.value.left + " ";
+    buildExpression(group.value.body, options);
+    norm_str = norm_str + "\\right" + group.value.right + " ";
+};
+groupTypes.accent = function(group, options) {
+    if (group.value.base.type != 'ordgroup') {
+        norm_str = norm_str + group.value.accent + " { ";
+        buildGroup(group.value.base, options);
+        norm_str = norm_str + "} ";
+    } else {
+        norm_str = norm_str + group.value.accent + " ";
+        buildGroup(group.value.base, options);
+    }
+};
+groupTypes.spacing = function(group) {
+    var node;
+    if (group.value == " ") {
+        norm_str = norm_str + "~ ";
+    } else {
+        norm_str = norm_str + group.value + " ";
+    }
+    return node;
+};
+groupTypes.op = function(group) {
+    var node;
+    // TODO(emily): handle big operators using the `largeop` attribute
+    if (group.value.symbol) {
+        // This is a symbol. Just add the symbol.
+        norm_str = norm_str + group.value.body + " ";
+    } else {
+        if (group.value.limits == false) {
+            norm_str = norm_str + "\\\operatorname { ";
+        } else {
+            norm_str = norm_str + "\\\operatorname* { ";
+        }
+        for (i = 1; i < group.value.body.length; ++i ) {
+            norm_str = norm_str + group.value.body[i] + " ";
+        }
+        norm_str = norm_str + "} ";
+    }
+};
+groupTypes.katex = function(group) {
+    var node = new mathMLTree.MathNode(
+        "mtext", [new mathMLTree.TextNode("KaTeX")]);
+    return node;
+};
+groupTypes.font = function(group, options) {
+    var font = group.value.font;
+    if (font == "mbox" || font == "hbox") {
+        font = "mathrm";
+    }
+    norm_str = norm_str + "\\" + font + " ";
+    buildGroup(group.value.body, options.withFont(font));    
+};
+groupTypes.delimsizing = function(group) {
+    var children = [];
+    norm_str = norm_str + group.value.funcName + " " + group.value.value + " ";
+};
+groupTypes.styling = function(group, options) {
+    norm_str = norm_str + " " + group.value.original + " ";
+    buildExpression(group.value.value, options);
+};
+groupTypes.sizing = function(group, options) {
+    if (group.value.original == "\\rm") {
+        norm_str = norm_str + "\\mathrm { "; 
+        buildExpression(group.value.value, options.withFont("mathrm"));
+        norm_str = norm_str + "} ";
+    } else {
+        norm_str = norm_str + " " + group.value.original + " ";
+        buildExpression(group.value.value, options);
+    }
+};
+groupTypes.overline = function(group, options) {
+    norm_str = norm_str + "\\overline { ";
+    buildGroup(group.value.body, options);
+    norm_str = norm_str + "} ";
+    norm_str = norm_str;
+};
+groupTypes.underline = function(group, options) {
+    norm_str = norm_str + "\\underline { ";
+    buildGroup(group.value.body, options);
+    norm_str = norm_str + "} ";
+    norm_str = norm_str;
+};
+groupTypes.rule = function(group) {
+    norm_str = norm_str + "\\rule { "+group.value.width.number+" "+group.value.width.unit+"  } { "+group.value.height.number+" "+group.value.height.unit+ " } ";
+};
+groupTypes.llap = function(group, options) {
+    norm_str = norm_str + "\\llap ";
+    buildGroup(group.value.body, options);
+};
+groupTypes.rlap = function(group, options) {
+    norm_str = norm_str + "\\rlap ";
+    buildGroup(group.value.body, options);
+};
+groupTypes.phantom = function(group, options, prev) {
+    norm_str = norm_str + "\\phantom { ";
+    buildExpression(group.value.value, options);
+    norm_str = norm_str + "} ";
+};
+/**
+ * Takes a list of nodes, builds them, and returns a list of the generated
+ * MathML nodes. A little simpler than the HTML version because we don't do any
+ * previous-node handling.
+ */
+var buildExpression = function(expression, options) {
+    var groups = [];
+    for (var i = 0; i < expression.length; i++) {
+        var group = expression[i];
+        buildGroup(group, options);
+    }
+    // console.log(norm_str);
+    // return groups;
+};
+/**
+ * Takes a group from the parser and calls the appropriate groupTypes function
+ * on it to produce a MathML node.
+ */
+var buildGroup = function(group, options) {
+    if (groupTypes[group.type]) {
+        groupTypes[group.type](group, options);
+    } else {
+        throw new ParseError(
+            "Got group of unknown type: '" + group.type + "'");
+    }
+};
--- a/cdm/modules/tokenize_latex/third_party/README.md
+++ b/cdm/modules/tokenize_latex/third_party/README.md
+Directly taken from https://github.com/harvardnlp/im2markup
--- a/cdm/modules/tokenize_latex/third_party/katex/README.md
+++ b/cdm/modules/tokenize_latex/third_party/katex/README.md
+# [<img src="https://khan.github.io/KaTeX/katex-logo.svg" width="130" alt="KaTeX">](https://khan.github.io/KaTeX/) [![Build Status](https://travis-ci.org/Khan/KaTeX.svg?branch=master)](https://travis-ci.org/Khan/KaTeX)
+[![Join the chat at https://gitter.im/Khan/KaTeX](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Khan/KaTeX?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+KaTeX is a fast, easy-to-use JavaScript library for TeX math rendering on the web.
+ * **Fast:** KaTeX renders its math synchronously and doesn't need to reflow the page. See how it compares to a competitor in [this speed test](http://jsperf.com/katex-vs-mathjax/).
+ * **Print quality:** KaTeX’s layout is based on Donald Knuth’s TeX, the gold standard for math typesetting.
+ * **Self contained:** KaTeX has no dependencies and can easily be bundled with your website resources.
+ * **Server side rendering:** KaTeX produces the same output regardless of browser or environment, so you can pre-render expressions using Node.js and send them as plain HTML.
+KaTeX supports all major browsers, including Chrome, Safari, Firefox, Opera, and IE 8 - IE 11.  A list of supported  commands can be on the [wiki](https://github.com/Khan/KaTeX/wiki/Function-Support-in-KaTeX).
+## Usage
+You can [download KaTeX](https://github.com/khan/katex/releases) and host it on your server or include the `katex.min.js` and `katex.min.css` files on your page directly from a CDN:
+```html
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.5.1/katex.min.css">
+<script src="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.5.1/katex.min.js"></script>
+```
+#### In-browser rendering
+Call `katex.render` with a TeX expression and a DOM element to render into:
+```js
+katex.render("c = \\pm\\sqrt{a^2 + b^2}", element);
+```
+If KaTeX can't parse the expression, it throws a `katex.ParseError` error.
+#### Server side rendering or rendering to a string
+To generate HTML on the server or to generate an HTML string of the rendered math, you can use `katex.renderToString`:
+```js
+var html = katex.renderToString("c = \\pm\\sqrt{a^2 + b^2}");
+// '<span class="katex">...</span>'
+```
+Make sure to include the CSS and font files, but there is no need to include the JavaScript. Like `render`, `renderToString` throws if it can't parse the expression.
+#### Rendering options
+You can provide an object of options as the last argument to `katex.render` and `katex.renderToString`. Available options are:
+- `displayMode`: `boolean`. If `true` the math will be rendered in display mode, which will put the math in display style (so `\int` and `\sum` are large, for example), and will center the math on the page on its own line. If `false` the math will be rendered in inline mode. (default: `false`)
+- `throwOnError`: `boolean`. If `true`, KaTeX will throw a `ParseError` when it encounters an unsupported command. If `false`, KaTeX will render the unsupported command as text in the color given by `errorColor`. (default: `true`)
+- `errorColor`: `string`. A color string given in the format `"#XXX"` or `"#XXXXXX"`. This option determines the color which unsupported commands are rendered in. (default: `#cc0000`)
+For example:
+```js
+katex.render("c = \\pm\\sqrt{a^2 + b^2}", element, { displayMode: true });
+```
+#### Automatic rendering of math on a page
+Math on the page can be automatically rendered using the auto-render extension. See [the Auto-render README](contrib/auto-render/README.md) for more information.
+## Contributing
+See [CONTRIBUTING.md](CONTRIBUTING.md)
+## License
+KaTeX is licensed under the [MIT License](http://opensource.org/licenses/MIT).
--- a/cdm/modules/tokenize_latex/third_party/katex/cli.js
+++ b/cdm/modules/tokenize_latex/third_party/katex/cli.js
+#!/usr/bin/env node
+// Simple CLI for KaTeX.
+// Reads TeX from stdin, outputs HTML to stdout.
+/* eslint no-console:0 */
+var katex = require("./");
+var input = "";
+// Skip the first two args, which are just "node" and "cli.js"
+var args = process.argv.slice(2);
+if (args.indexOf("--help") !== -1) {
+    console.log(process.argv[0] + " " + process.argv[1] +
+                " [ --help ]" +
+                " [ --display-mode ]");
+    console.log("\n" +
+                "Options:");
+    console.log("  --help            Display this help message");
+    console.log("  --display-mode    Render in display mode (not inline mode)");
+    process.exit();
+}
+process.stdin.on("data", function(chunk) {
+    input += chunk.toString();
+});
+process.stdin.on("end", function() {
+    var options = { displayMode: args.indexOf("--display-mode") !== -1 };
+    var output = katex.renderToString(input, options);
+    console.log(output);
+});
--- a/cdm/modules/tokenize_latex/third_party/katex/katex.js
+++ b/cdm/modules/tokenize_latex/third_party/katex/katex.js
+/* eslint no-console:0 */
+/**
+ * This is the main entry point for KaTeX. Here, we expose functions for
+ * rendering expressions either to DOM nodes or to markup strings.
+ *
+ * We also expose the ParseError class to check if errors thrown from KaTeX are
+ * errors in the expression, or errors in javascript handling.
+ */
+var ParseError = require("./src/ParseError");
+var Settings = require("./src/Settings");
+var buildTree = require("./src/buildTree");
+var parseTree = require("./src/parseTree");
+var utils = require("./src/utils");
+/**
+ * Parse and build an expression, and place that expression in the DOM node
+ * given.
+ */
+var render = function(expression, baseNode, options) {
+    utils.clearNode(baseNode);
+    var settings = new Settings(options);
+    var tree = parseTree(expression, settings);
+    var node = buildTree(tree, expression, settings).toNode();
+    baseNode.appendChild(node);
+};
+// KaTeX's styles don't work properly in quirks mode. Print out an error, and
+// disable rendering.
+if (typeof document !== "undefined") {
+    if (document.compatMode !== "CSS1Compat") {
+        typeof console !== "undefined" && console.warn(
+            "Warning: KaTeX doesn't work in quirks mode. Make sure your " +
+                "website has a suitable doctype.");
+        render = function() {
+            throw new ParseError("KaTeX doesn't work in quirks mode.");
+        };
+    }
+}
+/**
+ * Parse and build an expression, and return the markup for that.
+ */
+var renderToString = function(expression, options) {
+    var settings = new Settings(options);
+    var tree = parseTree(expression, settings);
+    return buildTree(tree, expression, settings).toMarkup();
+};
+/**
+ * Parse an expression and return the parse tree.
+ */
+var generateParseTree = function(expression, options) {
+    var settings = new Settings(options);
+    return parseTree(expression, settings);
+};
+module.exports = {
+    render: render,
+    renderToString: renderToString,
+    /**
+     * NOTE: This method is not currently recommended for public use.
+     * The internal tree representation is unstable and is very likely
+     * to change. Use at your own risk.
+     */
+    __parse: generateParseTree,
+    ParseError: ParseError,
+};
--- a/cdm/modules/tokenize_latex/third_party/katex/package.json
+++ b/cdm/modules/tokenize_latex/third_party/katex/package.json
+{
+  "_args": [
+    [
+      "katex",
+      "/home/srush/Projects/im2latex"
+    ]
+  ],
+  "_from": "katex@latest",
+  "_id": "katex@0.6.0",
+  "_inCache": true,
+  "_installable": true,
+  "_location": "/katex",
+  "_nodeVersion": "4.2.1",
+  "_npmOperationalInternal": {
+    "host": "packages-12-west.internal.npmjs.com",
+    "tmp": "tmp/katex-0.6.0.tgz_1460769444991_0.38667152682319283"
+  },
+  "_npmUser": {
+    "email": "kevinb7@gmail.com",
+    "name": "kevinbarabash"
+  },
+  "_npmVersion": "2.15.2",
+  "_phantomChildren": {},
+  "_requested": {
+    "name": "katex",
+    "raw": "katex",
+    "rawSpec": "",
+    "scope": null,
+    "spec": "latest",
+    "type": "tag"
+  },
+  "_requiredBy": [
+    "#USER"
+  ],
+  "_resolved": "https://registry.npmjs.org/katex/-/katex-0.6.0.tgz",
+  "_shasum": "12418e09121c05c92041b6b3b9fb6bab213cb6f3",
+  "_shrinkwrap": null,
+  "_spec": "katex",
+  "_where": "/home/srush/Projects/im2latex",
+  "bin": {
+    "katex": "cli.js"
+  },
+  "bugs": {
+    "url": "https://github.com/Khan/KaTeX/issues"
+  },
+  "dependencies": {
+    "match-at": "^0.1.0"
+  },
+  "description": "Fast math typesetting for the web.",
+  "devDependencies": {
+    "browserify": "^10.2.4",
+    "clean-css": "~2.2.15",
+    "eslint": "^1.10.2",
+    "express": "~3.3.3",
+    "glob": "^5.0.15",
+    "jasmine": "^2.3.2",
+    "jasmine-core": "^2.3.4",
+    "js-yaml": "^3.3.1",
+    "jspngopt": "^0.1.0",
+    "less": "~1.7.5",
+    "nomnom": "^1.8.1",
+    "pako": "0.2.7",
+    "selenium-webdriver": "^2.46.1",
+    "uglify-js": "~2.4.15"
+  },
+  "directories": {},
+  "dist": {
+    "shasum": "12418e09121c05c92041b6b3b9fb6bab213cb6f3",
+    "tarball": "https://registry.npmjs.org/katex/-/katex-0.6.0.tgz"
+  },
+  "files": [
+    "cli.js",
+    "dist/",
+    "katex.js",
+    "src/"
+  ],
+  "gitHead": "b94fc6534d5c23f944906a52a592bee4e0090665",
+  "homepage": "https://github.com/Khan/KaTeX#readme",
+  "license": "MIT",
+  "main": "katex.js",
+  "maintainers": [
+    {
+      "name": "kevinbarabash",
+      "email": "kevinb7@gmail.com"
+    },
+    {
+      "name": "spicyj",
+      "email": "ben@benalpert.com"
+    },
+    {
+      "name": "xymostech",
+      "email": "xymostech@gmail.com"
+    }
+  ],
+  "name": "katex",
+  "optionalDependencies": {},
+  "readme": "ERROR: No README data found!",
+  "repository": {
+    "type": "git",
+    "url": "git://github.com/Khan/KaTeX.git"
+  },
+  "scripts": {
+    "prepublish": "make dist",
+    "start": "node server.js",
+    "test": "make lint test"
+  },
+  "version": "0.6.0"
+}
--- a/cdm/modules/tokenize_latex/third_party/katex/src/Lexer.js
+++ b/cdm/modules/tokenize_latex/third_party/katex/src/Lexer.js
+/**
+ * The Lexer class handles tokenizing the input in various ways. Since our
+ * parser expects us to be able to backtrack, the lexer allows lexing from any
+ * given starting point.
+ *
+ * Its main exposed function is the `lex` function, which takes a position to
+ * lex from and a type of token to lex. It defers to the appropriate `_innerLex`
+ * function.
+ *
+ * The various `_innerLex` functions perform the actual lexing of different
+ * kinds.
+ */
+var matchAt = require("../../match-at");
+var ParseError = require("./ParseError");
+// The main lexer class
+function Lexer(input) {
+    this._input = input;
+}
+// The resulting token returned from `lex`.
+function Token(text, data, position) {
+    this.text = text;
+    this.data = data;
+    this.position = position;
+}
+/* The following tokenRegex
+ * - matches typical whitespace (but not NBSP etc.) using its first group
+ * - matches symbol combinations which result in a single output character
+ * - does not match any control character \x00-\x1f except whitespace
+ * - does not match a bare backslash
+ * - matches any ASCII character except those just mentioned
+ * - does not match the BMP private use area \uE000-\uF8FF
+ * - does not match bare surrogate code units
+ * - matches any BMP character except for those just described
+ * - matches any valid Unicode surrogate pair
+ * - matches a backslash followed by one or more letters
+ * - matches a backslash followed by any BMP character, including newline
+ * Just because the Lexer matches something doesn't mean it's valid input:
+ * If there is no matching function or symbol definition, the Parser will
+ * still reject the input.
+ */
+var tokenRegex = new RegExp(
+    "([ \r\n\t]+)|(" +                                // whitespace
+    "---?" +                                          // special combinations
+    "|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" +  // single codepoint
+    "|[\uD800-\uDBFF][\uDC00-\uDFFF]" +               // surrogate pair
+    "|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" +           // function name
+    ")"
+);
+var whitespaceRegex = /\s*/;
+/**
+ * This function lexes a single normal token. It takes a position and
+ * whether it should completely ignore whitespace or not.
+ */
+Lexer.prototype._innerLex = function(pos, ignoreWhitespace) {
+    var input = this._input;
+    if (pos === input.length) {
+        return new Token("EOF", null, pos);
+    }
+    var match = matchAt(tokenRegex, input, pos);
+    if (match === null) {
+        throw new ParseError(
+            "Unexpected character: '" + input[pos] + "'",
+            this, pos);
+    } else if (match[2]) { // matched non-whitespace
+        return new Token(match[2], null, pos + match[2].length);
+    } else if (ignoreWhitespace) {
+        return this._innerLex(pos + match[1].length, true);
+    } else { // concatenate whitespace to a single space
+        return new Token(" ", null, pos + match[1].length);
+    }
+};
+// A regex to match a CSS color (like #ffffff or BlueViolet)
+var cssColor = /#[a-z0-9]+|[a-z]+/i;
+/**
+ * This function lexes a CSS color.
+ */
+Lexer.prototype._innerLexColor = function(pos) {
+    var input = this._input;
+    // Ignore whitespace
+    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
+    pos += whitespace.length;
+    var match;
+    if ((match = matchAt(cssColor, input, pos))) {
+        // If we look like a color, return a color
+        return new Token(match[0], null, pos + match[0].length);
+    } else {
+        throw new ParseError("Invalid color", this, pos);
+    }
+};
+// A regex to match a dimension. Dimensions look like
+// "1.2em" or ".4pt" or "1 ex"
+var sizeRegex = /(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;
+/**
+ * This function lexes a dimension.
+ */
+Lexer.prototype._innerLexSize = function(pos) {
+    var input = this._input;
+    // Ignore whitespace
+    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
+    pos += whitespace.length;
+    var match;
+    if ((match = matchAt(sizeRegex, input, pos))) {
+        var unit = match[3];
+        // We only currently handle "em" and "ex" units
+        // if (unit !== "em" && unit !== "ex") {
+        //     throw new ParseError("Invalid unit: '" + unit + "'", this, pos);
+        // }
+        return new Token(match[0], {
+            number: +(match[1] + match[2]),
+            unit: unit,
+        }, pos + match[0].length);
+    }
+    throw new ParseError("Invalid size", this, pos);
+};
+/**
+ * This function lexes a string of whitespace.
+ */
+Lexer.prototype._innerLexWhitespace = function(pos) {
+    var input = this._input;
+    var whitespace = matchAt(whitespaceRegex, input, pos)[0];
+    pos += whitespace.length;
+    return new Token(whitespace[0], null, pos);
+};
+/**
+ * This function lexes a single token starting at `pos` and of the given mode.
+ * Based on the mode, we defer to one of the `_innerLex` functions.
+ */
+Lexer.prototype.lex = function(pos, mode) {
+    if (mode === "math") {
+        return this._innerLex(pos, true);
+    } else if (mode === "text") {
+        return this._innerLex(pos, false);
+    } else if (mode === "color") {
+        return this._innerLexColor(pos);
+    } else if (mode === "size") {
+        return this._innerLexSize(pos);
+    } else if (mode === "whitespace") {
+        return this._innerLexWhitespace(pos);
+    }
+};
+module.exports = Lexer;
--- a/cdm/modules/tokenize_latex/third_party/katex/src/Options.js
+++ b/cdm/modules/tokenize_latex/third_party/katex/src/Options.js
+/**
+ * This file contains information about the options that the Parser carries
+ * around with it while parsing. Data is held in an `Options` object, and when
+ * recursing, a new `Options` object can be created with the `.with*` and
+ * `.reset` functions.
+ */
+/**
+ * This is the main options class. It contains the style, size, color, and font
+ * of the current parse level. It also contains the style and size of the parent
+ * parse level, so size changes can be handled efficiently.
+ *
+ * Each of the `.with*` and `.reset` functions passes its current style and size
+ * as the parentStyle and parentSize of the new options class, so parent
+ * handling is taken care of automatically.
+ */
+function Options(data) {
+    this.style = data.style;
+    this.color = data.color;
+    this.size = data.size;
+    this.phantom = data.phantom;
+    this.font = data.font;
+    if (data.parentStyle === undefined) {
+        this.parentStyle = data.style;
+    } else {
+        this.parentStyle = data.parentStyle;
+    }
+    if (data.parentSize === undefined) {
+        this.parentSize = data.size;
+    } else {
+        this.parentSize = data.parentSize;
+    }
+}
+/**
+ * Returns a new options object with the same properties as "this".  Properties
+ * from "extension" will be copied to the new options object.
+ */
+Options.prototype.extend = function(extension) {
+    var data = {
+        style: this.style,
+        size: this.size,
+        color: this.color,
+        parentStyle: this.style,
+        parentSize: this.size,
+        phantom: this.phantom,
+        font: this.font,
+    };
+    for (var key in extension) {
+        if (extension.hasOwnProperty(key)) {
+            data[key] = extension[key];
+        }
+    }
+    return new Options(data);
+};
+/**
+ * Create a new options object with the given style.
+ */
+Options.prototype.withStyle = function(style) {
+    return this.extend({
+        style: style,
+    });
+};
+/**
+ * Create a new options object with the given size.
+ */
+Options.prototype.withSize = function(size) {
+    return this.extend({
+        size: size,
+    });
+};
+/**
+ * Create a new options object with the given color.
+ */
+Options.prototype.withColor = function(color) {
+    return this.extend({
+        color: color,
+    });
+};
+/**
+ * Create a new options object with "phantom" set to true.
+ */
+Options.prototype.withPhantom = function() {
+    return this.extend({
+        phantom: true,
+    });
+};
+/**
+ * Create a new options objects with the give font.
+ */
+Options.prototype.withFont = function(font) {
+    return this.extend({
+        font: font,
+    });
+};
+/**
+ * Create a new options object with the same style, size, and color. This is
+ * used so that parent style and size changes are handled correctly.
+ */
+Options.prototype.reset = function() {
+    return this.extend({});
+};
+/**
+ * A map of color names to CSS colors.
+ * TODO(emily): Remove this when we have real macros
+ */
+var colorMap = {
+    "katex-blue": "#6495ed",
+    "katex-orange": "#ffa500",
+    "katex-pink": "#ff00af",
+    "katex-red": "#df0030",
+    "katex-green": "#28ae7b",
+    "katex-gray": "gray",
+    "katex-purple": "#9d38bd",
+    "katex-blueA": "#c7e9f1",
+    "katex-blueB": "#9cdceb",
+    "katex-blueC": "#58c4dd",
+    "katex-blueD": "#29abca",
+    "katex-blueE": "#1c758a",
+    "katex-tealA": "#acead7",
+    "katex-tealB": "#76ddc0",
+    "katex-tealC": "#5cd0b3",
+    "katex-tealD": "#55c1a7",
+    "katex-tealE": "#49a88f",
+    "katex-greenA": "#c9e2ae",
+    "katex-greenB": "#a6cf8c",
+    "katex-greenC": "#83c167",
+    "katex-greenD": "#77b05d",
+    "katex-greenE": "#699c52",
+    "katex-goldA": "#f7c797",
+    "katex-goldB": "#f9b775",
+    "katex-goldC": "#f0ac5f",
+    "katex-goldD": "#e1a158",
+    "katex-goldE": "#c78d46",
+    "katex-redA": "#f7a1a3",
+    "katex-redB": "#ff8080",
+    "katex-redC": "#fc6255",
+    "katex-redD": "#e65a4c",
+    "katex-redE": "#cf5044",
+    "katex-maroonA": "#ecabc1",
+    "katex-maroonB": "#ec92ab",
+    "katex-maroonC": "#c55f73",
+    "katex-maroonD": "#a24d61",
+    "katex-maroonE": "#94424f",
+    "katex-purpleA": "#caa3e8",
+    "katex-purpleB": "#b189c6",
+    "katex-purpleC": "#9a72ac",
+    "katex-purpleD": "#715582",
+    "katex-purpleE": "#644172",
+    "katex-mintA": "#f5f9e8",
+    "katex-mintB": "#edf2df",
+    "katex-mintC": "#e0e5cc",
+    "katex-grayA": "#fdfdfd",
+    "katex-grayB": "#f7f7f7",
+    "katex-grayC": "#eeeeee",
+    "katex-grayD": "#dddddd",
+    "katex-grayE": "#cccccc",
+    "katex-grayF": "#aaaaaa",
+    "katex-grayG": "#999999",
+    "katex-grayH": "#555555",
+    "katex-grayI": "#333333",
+    "katex-kaBlue": "#314453",
+    "katex-kaGreen": "#639b24",
+};
+/**
+ * Gets the CSS color of the current options object, accounting for the
+ * `colorMap`.
+ */
+Options.prototype.getColor = function() {
+    if (this.phantom) {
+        return "transparent";
+    } else {
+        return colorMap[this.color] || this.color;
+    }
+};
+module.exports = Options;
--- a/cdm/modules/tokenize_latex/third_party/katex/src/ParseError.js
+++ b/cdm/modules/tokenize_latex/third_party/katex/src/ParseError.js
+/**
+ * This is the ParseError class, which is the main error thrown by KaTeX
+ * functions when something has gone wrong. This is used to distinguish internal
+ * errors from errors in the expression that the user provided.
+ */
+function ParseError(message, lexer, position) {
+    var error = "KaTeX parse error: " + message;
+    if (lexer !== undefined && position !== undefined) {
+        // If we have the input and a position, make the error a bit fancier
+        // Prepend some information
+        error += " at position " + position + ": ";
+        // Get the input
+        var input = lexer._input;
+        // Insert a combining underscore at the correct position
+        input = input.slice(0, position) + "\u0332" +
+            input.slice(position);
+        // Extract some context from the input and add it to the error
+        var begin = Math.max(0, position - 15);
+        var end = position + 15;
+        error += input.slice(begin, end);
+    }
+    // Some hackery to make ParseError a prototype of Error
+    // See http://stackoverflow.com/a/8460753
+    var self = new Error(error);
+    self.name = "ParseError";
+    self.__proto__ = ParseError.prototype;
+    self.position = position;
+    return self;
+}
+// More hackery
+ParseError.prototype.__proto__ = Error.prototype;
+module.exports = ParseError;