Commit bffed0fe authored by dengjb's avatar dengjb
Browse files

update

parents
[
{
"img_id": "case_1",
"gt": "r = \\frac { \\alpha } { \\beta } \\vert \\sin \\beta \\left( \\sigma _ { 1 } \\pm \\sigma _ { 2 } \\right) \\vert",
"pred": "r={\\frac{\\alpha}{\\beta}}|\\sin\\beta\\left(\\sigma_{2}+\\sigma_{1}\\right)|"
},
{
"img_id": "case_2",
"gt": "y = 2z + 3x",
"pred": "y = 2x + 3z"
},
{
"img_id": "case_3",
"gt": "\\begin{array} { r l r } & { } & { \\mathbf { J } _ { L } = \\left( \\begin{array} { c c } { 0 } & { 0 } \\\\ { v _ { n } } & { 0 } \\end{array} \\right) , ~ \\mathbf { J } _ { R } = \\left( \\begin{array} { c c } { u _ { n - 1 } } & { 0 } \\\\ { 0 } & { 0 } \\end{array} \\right) , ~ } \\\\ & { } & { ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ \\mathbf { K } = \\left( \\begin{array} { c c } { V _ { n - 1 } } & { u _ { n } } \\\\ { v _ { n - 1 } } & { V _ { n } } \\end{array} \\right) , } \\end{array}",
"pred": "\\mathbf{J}_{R}={\\left(\\begin{array}{l l}{0}&{0}\\\\ {v_{n}}&{0}\\end{array}\\right)}\\,,\\ \\mathbf{J}_{L}={\\left(\\begin{array}{l l}{u_{n-1}}&{0}\\\\ {0}&{0}\\end{array}\\right)}\\,,\\mathbf{K}={\\left(\\begin{array}{l l}{V_{n-1}}&{u_{n}}\\\\ {v_{n-1}}&{V_{n}}\\end{array}\\right)}\\,,"
}
]
\ No newline at end of file
[
{
"pred": "\\begin{tabular}{cccc}Property&Value\\\\ \\hline$e^{min}_c$&0.069\\\\ $e^{min}_d$&0.074\\\\ $e^{max}_c$&0.39\\\\ $e^{max}_d$&0.365\\\\ $\\epsilon$&0.17\\\\ $i^{min}_c$&$16.0^\\circ$\\\\ $i^{min}_d$&$11.6^\\circ$\\\\ $i^{max}_c$&$20.4^\\circ$\\\\ $i^{max}_d$&$16.7^\\circ$\\\\ $\\Psi^{min}$&$27.6^\\circ$\\\\ $\\Psi^{max}$&$37.1^\\circ$\\\\ $\\beta/\\beta_{crit}$&1.075\\\\ \\end{tabular} ",
"gt": "\\begin{tabular}{cc}Property&Value\\\\\\hline$e^{min}_c$&0.069\\\\$e^{min}_d$&0.074\\\\$e^{max}_c$&0.39\\\\$e^{max}_d$&0.365\\\\$\\epsilon$&0.17\\\\$i^{min}_c$&$16.0^\\circ$\\\\$i^{min}_d$&$11.6^\\circ$\\\\$i^{max}_c$&$20.4^\\circ$\\\\$i^{max}_d$&$16.7^\\circ$\\\\$\\Psi^{min}$&$27.6^\\circ$\\\\$\\Psi^{max}$&$37.1^\\circ$\\\\$\\beta/\\beta_{crit}$&1.075\\\\ \\end{tabular}\n"
},
{
"pred": "\\begin{tabular}{l r r r r}\\hline Element&\\multicolumn{2}{c}{fully convective}&\\multicolumn{2}{c}{convective at$\\tau_{\\rm R}=3.2$}\\\\ &$\\log\\tau_{\\rm diff}$[yrs]&$\\dot{m}$[g\\,s$^{-1}$]&$\\log\\tau_{\\rm diff}$[yrs]&$\\dot{m}$[g\\,s$^{-1}$]\\\\ \\hline$12$Mg&$-0.46$&$1.7\\times10^8$&$-2.2$&$7.3\\times10^7$\\\\ $14$Si&$-0.36$&$\\leq2.5\\times10^8$&$-2.5$&$\\leq2.9\\times10^8$\\\\ $20$Ca&$-0.37$&$7.2\\times10^6$&$-2.3$&$5.0\\times10^6$\\\\ \\hline\\end{tabular} ",
"gt": "\\begin{tabular}{lrrrr}\\hline\nElement&\\multicolumn{2}{c}{fully convective}&\\multicolumn{2}{c}{convective at$\\tau_\\mathrm{R}=3.2$}\\\\ &$\\log\\tau_\\mathrm{diff}$[yrs]&$\\dot m~[\\mathrm{g\\,s^{-1}}]$&$\\log\\tau_\\mathrm{diff}$[yrs]&$\\dot m~[\\mathrm{g\\,s^{-1}}]$\\\\\\hline12Mg&$-0.46$&$1.7\\times10^8$&$-2.2$&$7.3\\times10^7$\\\\14Si&$-0.36$&$\\leq2.5\\times10^8$&$-2.5$&$\\leq2.9\\times10^8$\\\\20Ca&$-0.37$&$7.2\\times10^6$&$-2.3$&$5.0\\times10^6$\\\\\\hline\\end{tabular}\n"
}
]
\ No newline at end of file
import os
import json
from tqdm import tqdm
import argparse
def change_data_format(input_json, output_json):
with open(input_json,'r') as f:
all_datas = json.load(f)
data_list = []
for key in all_datas.keys():
subset = key[-4:-1].lower()
for data in tqdm(all_datas[key]['text']):
im_id = os.path.basename(data['image_path'])[0:-4]
basename = f"{subset}_{im_id}"
new_item = {
"img_id": basename,
"gt": data["reference"],
"pred": data["prediction"]
}
data_list.append(new_item)
with open(output_json, "w") as f:
f.write(json.dumps(data_list, indent=2))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--input', '-i', type=str)
parser.add_argument('--output', '-o', type=str)
args = parser.parse_args()
print(args)
change_data_format(args.input, args.output)
\ No newline at end of file
import sys
import os
import re
import json
import time
import shutil
import argparse
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from datetime import datetime
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
from PIL import Image, ImageDraw
from skimage.measure import ransac
from modules.latex2bbox_color import latex2bbox_color
from modules.tokenize_latex.tokenize_latex import tokenize_latex
from modules.visual_matcher import HungarianMatcher, SimpleAffineTransform
def gen_color_list(num=10, gap=15):
num += 1
single_num = 255 // gap + 1
max_num = single_num ** 3
num = min(num, max_num)
color_list = []
for idx in range(num):
R = idx // single_num**2
GB = idx % single_num**2
G = GB // single_num
B = GB % single_num
color_list.append((R*gap, G*gap, B*gap))
return color_list[1:]
def update_inliers(ori_inliers, sub_inliers):
inliers = np.copy(ori_inliers)
sub_idx = -1
for idx in range(len(ori_inliers)):
if ori_inliers[idx] == False:
sub_idx += 1
if sub_inliers[sub_idx] == True:
inliers[idx] = True
return inliers
def reshape_inliers(ori_inliers, sub_inliers):
inliers = np.copy(ori_inliers)
sub_idx = -1
for idx in range(len(ori_inliers)):
if ori_inliers[idx] == False:
sub_idx += 1
if sub_inliers[sub_idx] == True:
inliers[idx] = True
else:
inliers[idx] = False
return inliers
def gen_token_order(box_list):
new_box_list = copy.deepcopy(box_list)
for idx, box in enumerate(new_box_list):
new_box_list[idx]['order'] = idx / len(new_box_list)
return new_box_list
def evaluation(data_root, user_id="test"):
data_root = os.path.join(data_root, user_id)
gt_box_dir = os.path.join(data_root, "gt")
pred_box_dir = os.path.join(data_root, "pred")
match_vis_dir = os.path.join(data_root, "vis_match")
os.makedirs(match_vis_dir, exist_ok=True)
max_iter = 5
min_samples = 2
residual_threshold = 20
max_trials = 50
metrics_per_img = {}
gt_basename_list = [item.split(".")[0] for item in os.listdir(os.path.join(gt_box_dir, 'bbox'))]
for basename in tqdm(gt_basename_list):
gt_valid, pred_valid = True, True
if not os.path.exists(os.path.join(gt_box_dir, 'bbox', basename+".jsonl")):
gt_valid = False
else:
with open(os.path.join(gt_box_dir, 'bbox', basename+".jsonl"), 'r') as f:
box_gt = []
for line in f:
info = json.loads(line)
if info['bbox']:
box_gt.append(info)
if not box_gt:
gt_valid = False
if not gt_valid:
continue
if not os.path.exists(os.path.join(pred_box_dir, 'bbox', basename+".jsonl")):
pred_valid = False
else:
with open(os.path.join(pred_box_dir, 'bbox', basename+".jsonl"), 'r') as f:
box_pred = []
for line in f:
info = json.loads(line)
if info['bbox']:
box_pred.append(info)
if not box_pred:
pred_valid = False
if not pred_valid:
metrics_per_img[basename] = {
"recall": 0,
"precision": 0,
"F1_score": 0,
}
continue
gt_img_path = os.path.join(gt_box_dir, 'vis', basename+"_base.png")
pred_img_path = os.path.join(pred_box_dir, 'vis', basename+"_base.png")
img_gt = Image.open(gt_img_path)
img_pred = Image.open(pred_img_path)
matcher = HungarianMatcher()
matched_idxes = matcher(box_gt, box_pred, img_gt.size, img_pred.size)
src = []
dst = []
for (idx1, idx2) in matched_idxes:
x1min, y1min, x1max, y1max = box_gt[idx1]['bbox']
x2min, y2min, x2max, y2max = box_pred[idx2]['bbox']
x1_c, y1_c = float((x1min+x1max)/2), float((y1min+y1max)/2)
x2_c, y2_c = float((x2min+x2max)/2), float((y2min+y2max)/2)
src.append([y1_c, x1_c])
dst.append([y2_c, x2_c])
src = np.array(src)
dst = np.array(dst)
if src.shape[0] <= min_samples:
inliers = np.array([True for _ in matched_idxes])
else:
inliers = np.array([False for _ in matched_idxes])
for i in range(max_iter):
if src[inliers==False].shape[0] <= min_samples:
break
model, inliers_1 = ransac((src[inliers==False], dst[inliers==False]), SimpleAffineTransform, min_samples=min_samples, residual_threshold=residual_threshold, max_trials=max_trials, random_state=42)
if inliers_1 is not None and inliers_1.any():
inliers = update_inliers(inliers, inliers_1)
else:
break
if len(inliers[inliers==True]) >= len(matched_idxes):
break
for idx, (a,b) in enumerate(matched_idxes):
if inliers[idx] == True and matcher.cost['token'][a, b] == 1:
inliers[idx] = False
final_match_num = len(inliers[inliers==True])
recall = round(final_match_num/(len(box_gt)), 3)
precision = round(final_match_num/(len(box_pred)), 3)
F1_score = round(2*final_match_num/(len(box_gt)+len(box_pred)), 3)
metrics_per_img[basename] = {
"recall": recall,
"precision": precision,
"F1_score": F1_score,
}
if True:
gap = 5
W1, H1 = img_gt.size
W2, H2 = img_pred.size
H = H1 + H2 + gap
W = max(W1, W2)
vis_img = Image.new('RGB', (W, H), (255, 255, 255))
vis_img.paste(img_gt, (0, 0))
vis_img.paste(Image.new('RGB', (W, gap), (120, 120, 120)), (0, H1))
vis_img.paste(img_pred, (0, H1+gap))
match_img = vis_img.copy()
match_draw = ImageDraw.Draw(match_img)
gt_matched_idx = {
a: flag
for (a,b), flag in
zip(matched_idxes, inliers)
}
pred_matched_idx = {
b: flag
for (a,b), flag in
zip(matched_idxes, inliers)
}
for idx, box in enumerate(box_gt):
if idx in gt_matched_idx and gt_matched_idx[idx]==True:
color = "green"
else:
color = "red"
x_min, y_min, x_max, y_max = box['bbox']
match_draw.rectangle([x_min-1, y_min-1, x_max+1, y_max+1], fill=None, outline=color, width=2)
for idx, box in enumerate(box_pred):
if idx in pred_matched_idx and pred_matched_idx[idx]==True:
color = "green"
else:
color = "red"
x_min, y_min, x_max, y_max = box['bbox']
match_draw.rectangle([x_min-1, y_min-1+H1+gap, x_max+1, y_max+1+H1+gap], fill=None, outline=color, width=2)
vis_img.save(os.path.join(match_vis_dir, basename+"_base.png"))
match_img.save(os.path.join(match_vis_dir, basename+".png"))
score_list = [val['F1_score'] for _, val in metrics_per_img.items()]
exp_list = [1 if score==1 else 0 for score in score_list]
metrics_res = {
"mean_score": round(np.mean(score_list), 3),
"exp_rate": round(np.mean(exp_list), 3),
"details": metrics_per_img
}
metric_res_path = os.path.join(data_root, "metrics_res.json")
with open(metric_res_path, "w") as f:
f.write(json.dumps(metrics_res, indent=2))
return metrics_res, metric_res_path, match_vis_dir
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--input', '-i', type=str, default="assets/example/input_example.json")
parser.add_argument('--output', '-o', type=str, default="output")
parser.add_argument('--pools', '-p', type=int, default=240)
args = parser.parse_args()
print(args)
json_input, data_root, pool_num = args.input, args.output, args.pools
temp_dir = os.path.join(data_root, "temp_dir")
exp_name = os.path.basename(json_input).split('.')[0]
with open(json_input, "r") as f:
input_data = json.load(f)
img_ids = []
groundtruths = []
predictions = []
for idx, item in enumerate(input_data):
if "img_id" in item:
img_ids.append(item["img_id"])
else:
img_ids.append(f"sample_{idx}")
groundtruths.append(item['gt'])
predictions.append(item['pred'])
a = time.time()
user_id = exp_name
total_color_list = gen_color_list(num=5800)
data_root = os.path.join(data_root, user_id)
output_dir_info = {}
input_args = []
for subset, latex_list in zip(['gt', 'pred'], [groundtruths, predictions]):
sub_temp_dir = os.path.join(temp_dir, f"{exp_name}_{subset}")
os.makedirs(sub_temp_dir, exist_ok=True)
output_path = os.path.join(data_root, subset)
output_dir_info[output_path] = []
os.makedirs(os.path.join(output_path, 'bbox'), exist_ok=True)
os.makedirs(os.path.join(output_path, 'vis'), exist_ok=True)
for idx, latex in tqdm(enumerate(latex_list), desc=f"collect {subset} latex ..."):
basename = img_ids[idx]
input_arg = latex, basename, output_path, sub_temp_dir, total_color_list
input_args.append(input_arg)
if pool_num > 1:
print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "using processpool, pool num:", pool_num, ", job num:", len(input_args))
myP = Pool(args.pools)
for input_arg in input_args:
myP.apply_async(latex2bbox_color, args=(input_arg,))
myP.close()
myP.join()
else:
for input_arg in input_args:
latex2bbox_color(input_arg)
b = time.time()
print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "extract bbox done, time cost:", round(b-a, 3), "s")
for subset in ['gt', 'pred']:
shutil.rmtree(os.path.join(temp_dir, f"{exp_name}_{subset}"))
c = time.time()
metrics_res, metric_res_path, match_vis_dir = evaluation(args.output, exp_name)
d = time.time()
print(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "calculate metrics done, time cost:", round(d-c, 3), "s")
print(f"=> process done, mean f1 score: {metrics_res['mean_score']}.")
print(f"=> more details of metrics are saved in `{metric_res_path}`")
print(f"=> visulization images are saved under `{match_vis_dir}`")
\ No newline at end of file
import os
import re
import cv2
import json
import shutil
import logging
import subprocess
import numpy as np
from threading import Timer
from PIL import Image, ImageDraw
from modules.latex_processor import (
normalize_latex,
token_add_color_RGB,
clean_latex
)
from modules.tokenize_latex.tokenize_latex import tokenize_latex
tabular_template = r"""
\documentclass[12pt]{article}
\usepackage[landscape]{geometry}
\usepackage{geometry}
\geometry{a<PaperSize>paper,scale=0.98}
\pagestyle{empty}
\usepackage{booktabs}
\usepackage{multirow}
\usepackage{amssymb}
\usepackage{upgreek}
\usepackage{amsmath}
\usepackage{xcolor}
\begin{document}
\makeatletter
\renewcommand*{\@textcolor}[3]{%%
\protect\leavevmode
\begingroup
\color#1{#2}#3%%
\endgroup
}
\makeatother
\begin{displaymath}
%s
\end{displaymath}
\end{document}
"""
formular_template = r"""
\documentclass[12pt]{article}
\usepackage[landscape]{geometry}
\usepackage{geometry}
\geometry{a<PaperSize>paper,scale=0.98}
\pagestyle{empty}
\usepackage{booktabs}
\usepackage{amsmath}
\usepackage{upgreek}
\usepackage{amssymb}
\usepackage{xcolor}
\begin{document}
\makeatletter
\renewcommand*{\@textcolor}[3]{%%
\protect\leavevmode
\begingroup
\color#1{#2}#3%%
\endgroup
}
\makeatother
\begin{displaymath}
%s
\end{displaymath}
\end{document}
"""
formular_template_zh = r"""
\documentclass[12pt]{article}
\usepackage[landscape]{geometry}
\usepackage{geometry}
\geometry{a<PaperSize>paper,scale=0.98}
\pagestyle{empty}
\usepackage{booktabs}
\usepackage{amsmath}
\usepackage{upgreek}
\usepackage{CJK}
\usepackage{amssymb}
\usepackage{xcolor}
\begin{document}
\makeatletter
\renewcommand*{\@textcolor}[3]{%%
\protect\leavevmode
\begingroup
\color#1{#2}#3%%
\endgroup
}
\makeatother
\begin{CJK}{UTF8}{gkai}
\begin{displaymath}
%s
\end{displaymath}
\end{CJK}
\end{document}
"""
def run_cmd(cmd, timeout_sec=30):
proc = subprocess.Popen(cmd, shell=True)
kill_proc = lambda p: p.kill()
timer = Timer(timeout_sec, kill_proc, [proc])
try:
timer.start()
stdout,stderr = proc.communicate()
finally:
timer.cancel()
def convert_pdf2img(pdf_filename, png_filename):
cmd = "magick -density 200 -quality 100 %s %s"%(pdf_filename, png_filename)
os.system(cmd)
def crop_image(image_path, pad=8):
img = Image.open(image_path).convert("L")
img_data = np.asarray(img, dtype=np.uint8)
nnz_inds = np.where(img_data!=255)
if len(nnz_inds[0]) == 0:
y_min = 0
y_max = 10
x_min = 0
x_max = 10
else:
y_min = np.min(nnz_inds[0])
y_max = np.max(nnz_inds[0])
x_min = np.min(nnz_inds[1])
x_max = np.max(nnz_inds[1])
img = Image.open(image_path).convert("RGB").crop((x_min-pad, y_min-pad, x_max+pad, y_max+pad))
img.save(image_path)
def extrac_bbox_from_color_image(image_path, color_list):
img = cv2.imread(image_path)
bbox_list = []
for target_color in color_list:
r, g, b = target_color
target_rgb = np.array([b, g, r], dtype=np.uint8)
mask = np.all(img == target_rgb, axis=2)
coords = np.argwhere(mask)
if coords.size > 0:
x_min, y_min = coords[:, 1].min(), coords[:, 0].min()
x_max, y_max = coords[:, 1].max(), coords[:, 0].max()
bbox_list.append([int(x_min-1), int(y_min-1), int(x_max+1), int(y_max+1)])
else:
bbox_list.append([])
img = Image.open(image_path).convert("RGB").convert("L")
img_bw = img.point(lambda x: 255 if x == 255 else 0, '1')
img_bw.convert("RGB").save(image_path)
return bbox_list
def contains_chinese(text):
# 匹配中文字符的正则表达式范围
return re.search(r'[\u4e00-\u9fff]', text) is not None
def wrap_chinese_in_text(latex_text):
chinese_pattern = r'[\u4e00-\u9fff\u3400-\u4dbf]'
# 匹配连续的中文字符
chinese_sequence_pattern = chinese_pattern + '+'
def replace_chinese(match):
chinese_text = match.group(0)
# 检查是否已经被\text{}包裹
start_pos = match.start()
end_pos = match.end()
# 检查匹配位置前后是否有\text{和}
before_text = latex_text[max(0, start_pos-6):start_pos]
after_text = latex_text[end_pos:min(len(latex_text), end_pos+1)]
if before_text.endswith('\\text{') and after_text.startswith('}'):
return chinese_text
else:
return f'\\text{{{chinese_text}}}'
# 替换所有连续的中文字符
result = re.sub(chinese_sequence_pattern, replace_chinese, latex_text)
return result
def latex2bbox_color(input_arg):
latex, basename, output_path, temp_dir, total_color_list = input_arg
if "tabular" in latex:
template = tabular_template
else:
if contains_chinese(latex):
template = formular_template_zh
latex = latex.replace(",", ", ").replace(":", ": ").replace(";", "; ")
latex = wrap_chinese_in_text(latex)
else:
template = formular_template
output_bbox_path = os.path.join(output_path, 'bbox', basename+'.jsonl')
output_vis_path = os.path.join(output_path, 'vis', basename+'.png')
output_base_path = os.path.join(output_path, 'vis', basename+'_base.png')
if os.path.exists(output_bbox_path) and os.path.exists(output_vis_path) and os.path.exists(output_base_path):
return
try:
latex = latex.replace("\n", " ")
latex = latex.replace("\%", "<PERCENTAGETOKEN>")
ret, new_latex = tokenize_latex(latex, middle_file=os.path.join(temp_dir, basename+'.txt'))
if not(ret and new_latex):
log = f"ERROR, Tokenize latex failed: {basename}."
logging.info(log)
new_latex = latex
if contains_chinese(new_latex):
new_latex = new_latex.replace("\\mathrm", "\\text")
new_latex = new_latex.replace("< P E R C E N T A G E T O K E N >", "\%")
latex = normalize_latex(new_latex)
token_list = []
l_split = latex.strip().split(' ')
color_list = total_color_list[0:len(l_split)]
idx = 0
while idx < len(l_split):
l_split, idx, token_list = token_add_color_RGB(l_split, idx, token_list)
rgb_latex = " ".join(l_split)
for idx, color in enumerate(color_list):
R, G, B = color
rgb_latex = rgb_latex.replace(f"<color_{idx}>", f"{R},{G},{B}")
if len(token_list) > 1300:
paper_size = 3
elif len(token_list) > 600:
paper_size = 4
else:
paper_size = 5
final_latex = template.replace("<PaperSize>", str(paper_size)) % rgb_latex
except Exception as e:
log = f"ERROR, Preprocess latex failed: {basename}; {e}."
logging.info(log)
return
pre_name = output_path.replace('/', '_').replace('.','_') + '_' + basename
tex_filename = os.path.join(temp_dir, pre_name+'.tex')
log_filename = os.path.join(temp_dir, pre_name+'.log')
aux_filename = os.path.join(temp_dir, pre_name+'.aux')
with open(tex_filename, "w") as w:
print(final_latex, file=w)
run_cmd(f"pdflatex -interaction=nonstopmode -output-directory={temp_dir} {tex_filename} >/dev/null")
try:
os.remove(tex_filename)
os.remove(log_filename)
os.remove(aux_filename)
except:
pass
pdf_filename = tex_filename[:-4]+'.pdf'
if not os.path.exists(pdf_filename):
log = f"ERROR, Compile pdf failed: {pdf_filename}"
logging.info(log)
else:
convert_pdf2img(pdf_filename, output_base_path)
os.remove(pdf_filename)
crop_image(output_base_path)
bbox_list = extrac_bbox_from_color_image(output_base_path, color_list)
vis = Image.open(output_base_path)
draw = ImageDraw.Draw(vis)
with open(output_bbox_path, 'w', encoding='utf-8') as f:
for token, box in zip(token_list, bbox_list):
item = {
"bbox": box,
"token": token
}
f.write(json.dumps(item, ensure_ascii=False)+'\n')
if not box:
continue
x_min, y_min, x_max, y_max = box
draw.rectangle([x_min, y_min, x_max, y_max], fill=None, outline=(0,250,0), width=1)
try:
draw.text((x_min, y_min), token, (250,0,0))
except:
pass
vis.save(output_vis_path)
This diff is collapsed.
import re
import os
import json
import time
import shutil
import random
import argparse
import subprocess
import numpy as np
from tqdm import tqdm
from multiprocessing import Pool
formular_template = r"""
\documentclass[12pt]{article}
\usepackage[landscape]{geometry}
\usepackage{geometry}
\geometry{a5paper,scale=0.98}
\pagestyle{empty}
\usepackage{booktabs}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{xcolor}
\begin{document}
\makeatletter
\renewcommand*{\@textcolor}[3]{%%
\protect\leavevmode
\begingroup
\color#1{#2}#3%%
\endgroup
}
\makeatother
\begin{displaymath}
%s
\end{displaymath}
\end{document}
"""
def run_shell_cmd(cmd, max_time=15):
child = subprocess.Popen(cmd, shell=True)
for i in range(max_time):
if child.poll():
return True
if i == max_time-1:
child.kill()
return False
time.sleep(1)
return False
def render_latex(latex_code, basename, latex_dir, pdf_dir):
latex_path = os.path.join(latex_dir, basename + ".tex")
pdf_path = os.path.join(pdf_dir, basename + ".pdf")
with open(latex_path, "w") as f:
f.write(formular_template % latex_code)
cmd = f"pdflatex -interaction=nonstopmode -output-directory={pdf_dir} -output-format=pdf {latex_path} >/dev/null"
run_shell_cmd(cmd)
return pdf_path
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--input', '-i', type=str, default='data/pred_results/test.json')
parser.add_argument('--clean', action='store_true', default=False)
parser.add_argument('--gt', action='store_true', default=False)
args = parser.parse_args()
if args.gt:
output_path = os.path.join("output", 'gt.json')
load_key = 'gt'
else:
load_key = 'pred'
output_path = os.path.join("output", os.path.basename(args.input))
temp_dir=f"render_temp_dir"
try:
shutil.rmtree(temp_dir)
except:
pass
latex_dir = os.path.join(temp_dir, "texes")
pdf_dir = os.path.join(temp_dir, "pdfs")
os.makedirs(latex_dir, exist_ok=True)
os.makedirs(pdf_dir, exist_ok=True)
with open(args.input, "r") as f:
input_data = json.load(f)
myP = Pool(200)
for idx, item in enumerate(input_data):
basename = f"sample_{idx}"
myP.apply_async(render_latex, args=(item[load_key], basename, latex_dir, pdf_dir))
myP.close()
print("processing, may take some times.")
myP.join()
success_num = 0
total_num = 0
for idx, item in enumerate(input_data):
basename = f"sample_{idx}"
total_num += 1
pdf_path = os.path.join(pdf_dir, basename + ".pdf")
if os.path.exists(pdf_path):
success_num += 1
item['renderable'] = 1
else:
item['renderable'] = 0
print("total num:", total_num, "render success num:", success_num)
with open(output_path, "w") as f:
f.write(json.dumps(input_data, indent=2))
if args.clean:
try:
shutil.rmtree(temp_dir)
except:
pass
\ No newline at end of file
const path = require('path');
var katex = require(path.join(__dirname,"third_party/katex/katex.js"))
options = require(path.join(__dirname,"third_party/katex/src/Options.js"))
var readline = require('readline');
var rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
terminal: false
});
rl.on('line', function(line){
a = line
if (line[0] == "%") {
line = line.substr(1, line.length - 1);
}
line = line.split('%')[0];
line = line.split('\\~').join(' ');
for (var i = 0; i < 300; i++) {
line = line.replace(/\\>/, " ");
line = line.replace('$', ' ');
line = line.replace(/\\label{.*?}/, "");
}
if (line.indexOf("matrix") == -1 && line.indexOf("cases")==-1 &&
line.indexOf("array")==-1 && line.indexOf("begin")==-1) {
for (var i = 0; i < 300; i++) {
line = line.replace(/\\\\/, "\\,");
}
}
line = line + " "
// global_str is tokenized version (build in parser.js)
// norm_str is normalized version build by renderer below.
try {
if (process.argv[2] == "tokenize") {
var tree = katex.__parse(line, {});
console.log(global_str.replace(/\\label { .*? }/, ""));
} else {
for (var i = 0; i < 300; ++i) {
line = line.replace(/{\\rm/, "\\mathrm{");
line = line.replace(/{ \\rm/, "\\mathrm{");
line = line.replace(/\\rm{/, "\\mathrm{");
}
var tree = katex.__parse(line, {});
buildExpression(tree, new options({}));
for (var i = 0; i < 300; ++i) {
norm_str = norm_str.replace('SSSSSS', '$');
norm_str = norm_str.replace(' S S S S S S', '$');
}
console.log(norm_str.replace(/\\label { .*? }/, ""));
}
} catch (e) {
console.error(line);
console.error(norm_str);
console.error(e);
console.log();
}
global_str = ""
norm_str = ""
})
// This is a LaTeX AST to LaTeX Renderer (modified version of KaTeX AST-> MathML).
norm_str = ""
var groupTypes = {};
groupTypes.mathord = function(group, options) {
if (options.font == "mathrm"){
for (i = 0; i < group.value.length; ++i ) {
if (group.value[i] == " ") {
norm_str = norm_str + group.value[i] + "\; ";
} else {
norm_str = norm_str + group.value[i] + " ";
}
}
} else {
norm_str = norm_str + group.value + " ";
}
};
groupTypes.textord = function(group, options) {
norm_str = norm_str + group.value + " ";
};
groupTypes.bin = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.rel = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.open = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.close = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.inner = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.punct = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.ordgroup = function(group, options) {
norm_str = norm_str + "{ ";
buildExpression(group.value, options);
norm_str = norm_str + "} ";
};
groupTypes.text = function(group, options) {
norm_str = norm_str + "\\mathrm { ";
buildExpression(group.value.body, options);
norm_str = norm_str + "} ";
};
groupTypes.color = function(group, options) {
var inner = buildExpression(group.value.value, options);
var node = new mathMLTree.MathNode("mstyle", inner);
node.setAttribute("mathcolor", group.value.color);
return node;
};
groupTypes.supsub = function(group, options) {
buildGroup(group.value.base, options);
if (group.value.sub) {
norm_str = norm_str + "_ ";
if (group.value.sub.type != 'ordgroup') {
norm_str = norm_str + " { ";
buildGroup(group.value.sub, options);
norm_str = norm_str + "} ";
} else {
buildGroup(group.value.sub, options);
}
}
if (group.value.sup) {
norm_str = norm_str + "^ ";
if (group.value.sup.type != 'ordgroup') {
norm_str = norm_str + " { ";
buildGroup(group.value.sup, options);
norm_str = norm_str + "} ";
} else {
buildGroup(group.value.sup, options);
}
}
};
groupTypes.genfrac = function(group, options) {
if (!group.value.hasBarLine) {
norm_str = norm_str + "\\binom ";
} else {
norm_str = norm_str + "\\frac ";
}
buildGroup(group.value.numer, options);
buildGroup(group.value.denom, options);
};
groupTypes.array = function(group, options) {
norm_str = norm_str + "\\begin{array} { ";
if (group.value.cols) {
group.value.cols.map(function(start) {
if (start && start.align) {
norm_str = norm_str + start.align + " ";}});
} else {
group.value.body[0].map(function(start) {
norm_str = norm_str + "l ";
} );
}
norm_str = norm_str + "} ";
group.value.body.map(function(row) {
if (row.some(cell => cell.value.length > 0)) { // orginal code: if (row[0].value.length > 0)
out = row.map(function(cell) {
buildGroup(cell, options);
if (norm_str.length > 4
&& norm_str.substring(norm_str.length-4, norm_str.length) == "{ } ") {
norm_str = norm_str.substring(0, norm_str.length-4) ;
}
norm_str = norm_str + "& ";
});
norm_str = norm_str.substring(0, norm_str.length-2) + "\\\\ ";
}
});
norm_str = norm_str + "\\end{array} ";
};
groupTypes.sqrt = function(group, options) {
var node;
if (group.value.index) {
norm_str = norm_str + "\\sqrt [ ";
buildExpression(group.value.index.value, options);
norm_str = norm_str + "] ";
buildGroup(group.value.body, options);
} else {
norm_str = norm_str + "\\sqrt ";
buildGroup(group.value.body, options);
}
};
groupTypes.leftright = function(group, options) {
norm_str = norm_str + "\\left" + group.value.left + " ";
buildExpression(group.value.body, options);
norm_str = norm_str + "\\right" + group.value.right + " ";
};
groupTypes.accent = function(group, options) {
if (group.value.base.type != 'ordgroup') {
norm_str = norm_str + group.value.accent + " { ";
buildGroup(group.value.base, options);
norm_str = norm_str + "} ";
} else {
norm_str = norm_str + group.value.accent + " ";
buildGroup(group.value.base, options);
}
};
groupTypes.spacing = function(group) {
var node;
if (group.value == " ") {
norm_str = norm_str + "~ ";
} else {
norm_str = norm_str + group.value + " ";
}
return node;
};
groupTypes.op = function(group) {
var node;
// TODO(emily): handle big operators using the `largeop` attribute
if (group.value.symbol) {
// This is a symbol. Just add the symbol.
norm_str = norm_str + group.value.body + " ";
} else {
if (group.value.limits == false) {
norm_str = norm_str + "\\\operatorname { ";
} else {
norm_str = norm_str + "\\\operatorname* { ";
}
for (i = 1; i < group.value.body.length; ++i ) {
norm_str = norm_str + group.value.body[i] + " ";
}
norm_str = norm_str + "} ";
}
};
groupTypes.katex = function(group) {
var node = new mathMLTree.MathNode(
"mtext", [new mathMLTree.TextNode("KaTeX")]);
return node;
};
groupTypes.font = function(group, options) {
var font = group.value.font;
if (font == "mbox" || font == "hbox") {
font = "mathrm";
}
norm_str = norm_str + "\\" + font + " ";
buildGroup(group.value.body, options.withFont(font));
};
groupTypes.delimsizing = function(group) {
var children = [];
norm_str = norm_str + group.value.funcName + " " + group.value.value + " ";
};
groupTypes.styling = function(group, options) {
norm_str = norm_str + " " + group.value.original + " ";
buildExpression(group.value.value, options);
};
groupTypes.sizing = function(group, options) {
if (group.value.original == "\\rm") {
norm_str = norm_str + "\\mathrm { ";
buildExpression(group.value.value, options.withFont("mathrm"));
norm_str = norm_str + "} ";
} else {
norm_str = norm_str + " " + group.value.original + " ";
buildExpression(group.value.value, options);
}
};
groupTypes.overline = function(group, options) {
norm_str = norm_str + "\\overline { ";
buildGroup(group.value.body, options);
norm_str = norm_str + "} ";
norm_str = norm_str;
};
groupTypes.underline = function(group, options) {
norm_str = norm_str + "\\underline { ";
buildGroup(group.value.body, options);
norm_str = norm_str + "} ";
norm_str = norm_str;
};
groupTypes.rule = function(group) {
norm_str = norm_str + "\\rule { "+group.value.width.number+" "+group.value.width.unit+" } { "+group.value.height.number+" "+group.value.height.unit+ " } ";
};
groupTypes.llap = function(group, options) {
norm_str = norm_str + "\\llap ";
buildGroup(group.value.body, options);
};
groupTypes.rlap = function(group, options) {
norm_str = norm_str + "\\rlap ";
buildGroup(group.value.body, options);
};
groupTypes.phantom = function(group, options, prev) {
norm_str = norm_str + "\\phantom { ";
buildExpression(group.value.value, options);
norm_str = norm_str + "} ";
};
/**
* Takes a list of nodes, builds them, and returns a list of the generated
* MathML nodes. A little simpler than the HTML version because we don't do any
* previous-node handling.
*/
var buildExpression = function(expression, options) {
var groups = [];
for (var i = 0; i < expression.length; i++) {
var group = expression[i];
buildGroup(group, options);
}
// console.log(norm_str);
// return groups;
};
/**
* Takes a group from the parser and calls the appropriate groupTypes function
* on it to produce a MathML node.
*/
var buildGroup = function(group, options) {
if (groupTypes[group.type]) {
groupTypes[group.type](group, options);
} else {
throw new ParseError(
"Got group of unknown type: '" + group.type + "'");
}
};
const path = require('path');
var katex = require(path.join(__dirname,"third_party/katex/katex.js"))
options = require(path.join(__dirname,"third_party/katex/src/Options.js"))
var readline = require('readline');
var rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
terminal: false
});
rl.on('line', function(line){
a = line
if (line[0] == "%") {
line = line.substr(1, line.length - 1);
}
// line = line.split('%')[0];
line = line.split('\\~').join(' ');
for (var i = 0; i < 300; i++) {
line = line.replace(/\\>/, " ");
// line = line.replace('$', ' ');
line = line.replace(/\\label{.*?}/, "");
}
if (line.indexOf("matrix") == -1 && line.indexOf("cases")==-1 &&
line.indexOf("array")==-1 && line.indexOf("begin")==-1) {
for (var i = 0; i < 300; i++) {
line = line.replace(/\\\\/, "\\,");
}
}
line = line + " "
// global_str is tokenized version (build in parser.js)
// norm_str is normalized version build by renderer below.
try {
if (process.argv[2] == "tokenize") {
var tree = katex.__parse(line, {});
console.log(global_str.replace(/\\label { .*? }/, ""));
} else {
for (var i = 0; i < 300; ++i) {
line = line.replace(/{\\rm/, "\\mathrm{");
line = line.replace(/{ \\rm/, "\\mathrm{");
line = line.replace(/\\rm{/, "\\mathrm{");
}
var tree = katex.__parse(line, {});
buildExpression(tree, new options({}));
for (var i = 0; i < 300; ++i) {
norm_str = norm_str.replace('SSSSSS', '$');
norm_str = norm_str.replace(' S S S S S S', '$');
}
console.log(norm_str.replace(/\\label { .*? }/, ""));
}
} catch (e) {
console.error(line);
console.error(norm_str);
console.error(e);
console.log("");
}
global_str = ""
norm_str = ""
})
// This is a LaTeX AST to LaTeX Renderer (modified version of KaTeX AST-> MathML).
norm_str = ""
var groupTypes = {};
groupTypes.mathord = function(group, options) {
if (options.font == "mathrm"){
for (i = 0; i < group.value.length; ++i ) {
if (group.value[i] == " ") {
norm_str = norm_str + group.value[i] + "\; ";
} else {
norm_str = norm_str + group.value[i] + " ";
}
}
} else {
norm_str = norm_str + group.value + " ";
}
};
groupTypes.textord = function(group, options) {
norm_str = norm_str + group.value + " ";
};
groupTypes.bin = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.rel = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.open = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.close = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.inner = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.punct = function(group) {
norm_str = norm_str + group.value + " ";
};
groupTypes.ordgroup = function(group, options) {
norm_str = norm_str + "{ ";
buildExpression(group.value, options);
norm_str = norm_str + "} ";
};
groupTypes.text = function(group, options) {
norm_str = norm_str + "\\mathrm { ";
buildExpression(group.value.body, options);
norm_str = norm_str + "} ";
};
groupTypes.color = function(group, options) {
var inner = buildExpression(group.value.value, options);
var node = new mathMLTree.MathNode("mstyle", inner);
node.setAttribute("mathcolor", group.value.color);
return node;
};
groupTypes.supsub = function(group, options) {
buildGroup(group.value.base, options);
if (group.value.sub) {
norm_str = norm_str + "_ ";
if (group.value.sub.type != 'ordgroup') {
norm_str = norm_str + " { ";
buildGroup(group.value.sub, options);
norm_str = norm_str + "} ";
} else {
buildGroup(group.value.sub, options);
}
}
if (group.value.sup) {
norm_str = norm_str + "^ ";
if (group.value.sup.type != 'ordgroup') {
norm_str = norm_str + " { ";
buildGroup(group.value.sup, options);
norm_str = norm_str + "} ";
} else {
buildGroup(group.value.sup, options);
}
}
};
groupTypes.genfrac = function(group, options) {
if (!group.value.hasBarLine) {
norm_str = norm_str + "\\binom ";
} else {
norm_str = norm_str + "\\frac ";
}
buildGroup(group.value.numer, options);
buildGroup(group.value.denom, options);
};
groupTypes.array = function(group, options) {
norm_str = norm_str + "\\begin{" + group.value.style + "} ";
if (group.value.style == "array" || group.value.style == "tabular" || group.value.style == "tabularx") {
norm_str = norm_str + "{ ";
if (group.value.cols) {
group.value.cols.map(function(start) {
if (start) {
if (start.type == "align") {
norm_str = norm_str + start.align + " ";
} else if (start.type == "separator") {
norm_str = norm_str + start.separator + " ";
}
}
});
} else {
group.value.body[0].map(function(start) {
norm_str = norm_str + "c ";
} );
}
norm_str = norm_str + "} ";
}
group.value.body.map(function(row) {
if (row.length > 1 || row[0].value.length > 0) {
if (row[0].value[0] && row[0].value[0].value == "\\hline") {
norm_str = norm_str + "\\hline ";
row[0].value = row[0].value.slice(1);
}
out = row.map(function(cell) {
buildGroup(cell, options);
norm_str = norm_str + "& ";
});
norm_str = norm_str.substring(0, norm_str.length-2) + "\\\\ ";
}
});
norm_str = norm_str + "\\end{" + group.value.style + "} ";
};
groupTypes.sqrt = function(group, options) {
var node;
if (group.value.index) {
norm_str = norm_str + "\\sqrt [ " + group.value.index + " ] ";
buildGroup(group.value.body, options);
} else {
norm_str = norm_str + "\\sqrt ";
buildGroup(group.value.body, options);
}
};
groupTypes.leftright = function(group, options) {
norm_str = norm_str + "\\left" + group.value.left + " ";
buildExpression(group.value.body, options);
norm_str = norm_str + "\\right" + group.value.right + " ";
};
groupTypes.accent = function(group, options) {
if (group.value.base.type != 'ordgroup') {
norm_str = norm_str + group.value.accent + " { ";
buildGroup(group.value.base, options);
norm_str = norm_str + "} ";
} else {
norm_str = norm_str + group.value.accent + " ";
buildGroup(group.value.base, options);
}
};
groupTypes.spacing = function(group) {
var node;
if (group.value == " ") {
norm_str = norm_str + "~ ";
} else {
norm_str = norm_str + group.value + " ";
}
return node;
};
groupTypes.op = function(group) {
var node;
// TODO(emily): handle big operators using the `largeop` attribute
if (group.value.symbol) {
// This is a symbol. Just add the symbol.
norm_str = norm_str + group.value.body + " ";
} else {
if (group.value.limits == false) {
norm_str = norm_str + "\\\operatorname { ";
} else {
norm_str = norm_str + "\\\operatorname* { ";
}
for (i = 1; i < group.value.body.length; ++i ) {
norm_str = norm_str + group.value.body[i] + " ";
}
norm_str = norm_str + "} ";
}
};
groupTypes.katex = function(group) {
var node = new mathMLTree.MathNode(
"mtext", [new mathMLTree.TextNode("KaTeX")]);
return node;
};
groupTypes.font = function(group, options) {
var font = group.value.font;
if (font == "mbox" || font == "hbox") {
font = "mathrm";
}
norm_str = norm_str + "\\" + font + " ";
buildGroup(group.value.body, options.withFont(font));
};
groupTypes.delimsizing = function(group) {
var children = [];
norm_str = norm_str + group.value.funcName + " " + group.value.value + " ";
};
groupTypes.styling = function(group, options) {
norm_str = norm_str + " " + group.value.original + " ";
buildExpression(group.value.value, options);
};
groupTypes.sizing = function(group, options) {
if (group.value.original == "\\rm") {
norm_str = norm_str + "\\mathrm { ";
buildExpression(group.value.value, options.withFont("mathrm"));
norm_str = norm_str + "} ";
} else {
norm_str = norm_str + " " + group.value.original + " ";
buildExpression(group.value.value, options);
}
};
groupTypes.overline = function(group, options) {
norm_str = norm_str + "\\overline { ";
buildGroup(group.value.body, options);
norm_str = norm_str + "} ";
norm_str = norm_str;
};
groupTypes.underline = function(group, options) {
norm_str = norm_str + "\\underline { ";
buildGroup(group.value.body, options);
norm_str = norm_str + "} ";
norm_str = norm_str;
};
groupTypes.rule = function(group) {
norm_str = norm_str + "\\rule { "+group.value.width.number+" "+group.value.width.unit+" } { "+group.value.height.number+" "+group.value.height.unit+ " } ";
};
groupTypes.llap = function(group, options) {
norm_str = norm_str + "\\llap ";
buildGroup(group.value.body, options);
};
groupTypes.rlap = function(group, options) {
norm_str = norm_str + "\\rlap ";
buildGroup(group.value.body, options);
};
groupTypes.phantom = function(group, options, prev) {
norm_str = norm_str + "\\phantom { ";
buildExpression(group.value.value, options);
norm_str = norm_str + "} ";
};
/**
* Takes a list of nodes, builds them, and returns a list of the generated
* MathML nodes. A little simpler than the HTML version because we don't do any
* previous-node handling.
*/
var buildExpression = function(expression, options) {
var groups = [];
for (var i = 0; i < expression.length; i++) {
var group = expression[i];
buildGroup(group, options);
}
// console.log(norm_str);
// return groups;
};
/**
* Takes a group from the parser and calls the appropriate groupTypes function
* on it to produce a MathML node.
*/
var buildGroup = function(group, options) {
if (groupTypes[group.type]) {
groupTypes[group.type](group, options);
} else {
throw new ParseError(
"Got group of unknown type: '" + group.type + "'");
}
};
Directly taken from https://github.com/harvardnlp/im2markup
# [<img src="https://khan.github.io/KaTeX/katex-logo.svg" width="130" alt="KaTeX">](https://khan.github.io/KaTeX/) [![Build Status](https://travis-ci.org/Khan/KaTeX.svg?branch=master)](https://travis-ci.org/Khan/KaTeX)
[![Join the chat at https://gitter.im/Khan/KaTeX](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Khan/KaTeX?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
KaTeX is a fast, easy-to-use JavaScript library for TeX math rendering on the web.
* **Fast:** KaTeX renders its math synchronously and doesn't need to reflow the page. See how it compares to a competitor in [this speed test](http://jsperf.com/katex-vs-mathjax/).
* **Print quality:** KaTeX’s layout is based on Donald Knuth’s TeX, the gold standard for math typesetting.
* **Self contained:** KaTeX has no dependencies and can easily be bundled with your website resources.
* **Server side rendering:** KaTeX produces the same output regardless of browser or environment, so you can pre-render expressions using Node.js and send them as plain HTML.
KaTeX supports all major browsers, including Chrome, Safari, Firefox, Opera, and IE 8 - IE 11. A list of supported commands can be on the [wiki](https://github.com/Khan/KaTeX/wiki/Function-Support-in-KaTeX).
## Usage
You can [download KaTeX](https://github.com/khan/katex/releases) and host it on your server or include the `katex.min.js` and `katex.min.css` files on your page directly from a CDN:
```html
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.5.1/katex.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.5.1/katex.min.js"></script>
```
#### In-browser rendering
Call `katex.render` with a TeX expression and a DOM element to render into:
```js
katex.render("c = \\pm\\sqrt{a^2 + b^2}", element);
```
If KaTeX can't parse the expression, it throws a `katex.ParseError` error.
#### Server side rendering or rendering to a string
To generate HTML on the server or to generate an HTML string of the rendered math, you can use `katex.renderToString`:
```js
var html = katex.renderToString("c = \\pm\\sqrt{a^2 + b^2}");
// '<span class="katex">...</span>'
```
Make sure to include the CSS and font files, but there is no need to include the JavaScript. Like `render`, `renderToString` throws if it can't parse the expression.
#### Rendering options
You can provide an object of options as the last argument to `katex.render` and `katex.renderToString`. Available options are:
- `displayMode`: `boolean`. If `true` the math will be rendered in display mode, which will put the math in display style (so `\int` and `\sum` are large, for example), and will center the math on the page on its own line. If `false` the math will be rendered in inline mode. (default: `false`)
- `throwOnError`: `boolean`. If `true`, KaTeX will throw a `ParseError` when it encounters an unsupported command. If `false`, KaTeX will render the unsupported command as text in the color given by `errorColor`. (default: `true`)
- `errorColor`: `string`. A color string given in the format `"#XXX"` or `"#XXXXXX"`. This option determines the color which unsupported commands are rendered in. (default: `#cc0000`)
For example:
```js
katex.render("c = \\pm\\sqrt{a^2 + b^2}", element, { displayMode: true });
```
#### Automatic rendering of math on a page
Math on the page can be automatically rendered using the auto-render extension. See [the Auto-render README](contrib/auto-render/README.md) for more information.
## Contributing
See [CONTRIBUTING.md](CONTRIBUTING.md)
## License
KaTeX is licensed under the [MIT License](http://opensource.org/licenses/MIT).
#!/usr/bin/env node
// Simple CLI for KaTeX.
// Reads TeX from stdin, outputs HTML to stdout.
/* eslint no-console:0 */
var katex = require("./");
var input = "";
// Skip the first two args, which are just "node" and "cli.js"
var args = process.argv.slice(2);
if (args.indexOf("--help") !== -1) {
console.log(process.argv[0] + " " + process.argv[1] +
" [ --help ]" +
" [ --display-mode ]");
console.log("\n" +
"Options:");
console.log(" --help Display this help message");
console.log(" --display-mode Render in display mode (not inline mode)");
process.exit();
}
process.stdin.on("data", function(chunk) {
input += chunk.toString();
});
process.stdin.on("end", function() {
var options = { displayMode: args.indexOf("--display-mode") !== -1 };
var output = katex.renderToString(input, options);
console.log(output);
});
/* eslint no-console:0 */
/**
* This is the main entry point for KaTeX. Here, we expose functions for
* rendering expressions either to DOM nodes or to markup strings.
*
* We also expose the ParseError class to check if errors thrown from KaTeX are
* errors in the expression, or errors in javascript handling.
*/
var ParseError = require("./src/ParseError");
var Settings = require("./src/Settings");
var buildTree = require("./src/buildTree");
var parseTree = require("./src/parseTree");
var utils = require("./src/utils");
/**
* Parse and build an expression, and place that expression in the DOM node
* given.
*/
var render = function(expression, baseNode, options) {
utils.clearNode(baseNode);
var settings = new Settings(options);
var tree = parseTree(expression, settings);
var node = buildTree(tree, expression, settings).toNode();
baseNode.appendChild(node);
};
// KaTeX's styles don't work properly in quirks mode. Print out an error, and
// disable rendering.
if (typeof document !== "undefined") {
if (document.compatMode !== "CSS1Compat") {
typeof console !== "undefined" && console.warn(
"Warning: KaTeX doesn't work in quirks mode. Make sure your " +
"website has a suitable doctype.");
render = function() {
throw new ParseError("KaTeX doesn't work in quirks mode.");
};
}
}
/**
* Parse and build an expression, and return the markup for that.
*/
var renderToString = function(expression, options) {
var settings = new Settings(options);
var tree = parseTree(expression, settings);
return buildTree(tree, expression, settings).toMarkup();
};
/**
* Parse an expression and return the parse tree.
*/
var generateParseTree = function(expression, options) {
var settings = new Settings(options);
return parseTree(expression, settings);
};
module.exports = {
render: render,
renderToString: renderToString,
/**
* NOTE: This method is not currently recommended for public use.
* The internal tree representation is unstable and is very likely
* to change. Use at your own risk.
*/
__parse: generateParseTree,
ParseError: ParseError,
};
{
"_args": [
[
"katex",
"/home/srush/Projects/im2latex"
]
],
"_from": "katex@latest",
"_id": "katex@0.6.0",
"_inCache": true,
"_installable": true,
"_location": "/katex",
"_nodeVersion": "4.2.1",
"_npmOperationalInternal": {
"host": "packages-12-west.internal.npmjs.com",
"tmp": "tmp/katex-0.6.0.tgz_1460769444991_0.38667152682319283"
},
"_npmUser": {
"email": "kevinb7@gmail.com",
"name": "kevinbarabash"
},
"_npmVersion": "2.15.2",
"_phantomChildren": {},
"_requested": {
"name": "katex",
"raw": "katex",
"rawSpec": "",
"scope": null,
"spec": "latest",
"type": "tag"
},
"_requiredBy": [
"#USER"
],
"_resolved": "https://registry.npmjs.org/katex/-/katex-0.6.0.tgz",
"_shasum": "12418e09121c05c92041b6b3b9fb6bab213cb6f3",
"_shrinkwrap": null,
"_spec": "katex",
"_where": "/home/srush/Projects/im2latex",
"bin": {
"katex": "cli.js"
},
"bugs": {
"url": "https://github.com/Khan/KaTeX/issues"
},
"dependencies": {
"match-at": "^0.1.0"
},
"description": "Fast math typesetting for the web.",
"devDependencies": {
"browserify": "^10.2.4",
"clean-css": "~2.2.15",
"eslint": "^1.10.2",
"express": "~3.3.3",
"glob": "^5.0.15",
"jasmine": "^2.3.2",
"jasmine-core": "^2.3.4",
"js-yaml": "^3.3.1",
"jspngopt": "^0.1.0",
"less": "~1.7.5",
"nomnom": "^1.8.1",
"pako": "0.2.7",
"selenium-webdriver": "^2.46.1",
"uglify-js": "~2.4.15"
},
"directories": {},
"dist": {
"shasum": "12418e09121c05c92041b6b3b9fb6bab213cb6f3",
"tarball": "https://registry.npmjs.org/katex/-/katex-0.6.0.tgz"
},
"files": [
"cli.js",
"dist/",
"katex.js",
"src/"
],
"gitHead": "b94fc6534d5c23f944906a52a592bee4e0090665",
"homepage": "https://github.com/Khan/KaTeX#readme",
"license": "MIT",
"main": "katex.js",
"maintainers": [
{
"name": "kevinbarabash",
"email": "kevinb7@gmail.com"
},
{
"name": "spicyj",
"email": "ben@benalpert.com"
},
{
"name": "xymostech",
"email": "xymostech@gmail.com"
}
],
"name": "katex",
"optionalDependencies": {},
"readme": "ERROR: No README data found!",
"repository": {
"type": "git",
"url": "git://github.com/Khan/KaTeX.git"
},
"scripts": {
"prepublish": "make dist",
"start": "node server.js",
"test": "make lint test"
},
"version": "0.6.0"
}
/**
* The Lexer class handles tokenizing the input in various ways. Since our
* parser expects us to be able to backtrack, the lexer allows lexing from any
* given starting point.
*
* Its main exposed function is the `lex` function, which takes a position to
* lex from and a type of token to lex. It defers to the appropriate `_innerLex`
* function.
*
* The various `_innerLex` functions perform the actual lexing of different
* kinds.
*/
var matchAt = require("../../match-at");
var ParseError = require("./ParseError");
// The main lexer class
function Lexer(input) {
this._input = input;
}
// The resulting token returned from `lex`.
function Token(text, data, position) {
this.text = text;
this.data = data;
this.position = position;
}
/* The following tokenRegex
* - matches typical whitespace (but not NBSP etc.) using its first group
* - matches symbol combinations which result in a single output character
* - does not match any control character \x00-\x1f except whitespace
* - does not match a bare backslash
* - matches any ASCII character except those just mentioned
* - does not match the BMP private use area \uE000-\uF8FF
* - does not match bare surrogate code units
* - matches any BMP character except for those just described
* - matches any valid Unicode surrogate pair
* - matches a backslash followed by one or more letters
* - matches a backslash followed by any BMP character, including newline
* Just because the Lexer matches something doesn't mean it's valid input:
* If there is no matching function or symbol definition, the Parser will
* still reject the input.
*/
var tokenRegex = new RegExp(
"([ \r\n\t]+)|(" + // whitespace
"---?" + // special combinations
"|[!-\\[\\]-\u2027\u202A-\uD7FF\uF900-\uFFFF]" + // single codepoint
"|[\uD800-\uDBFF][\uDC00-\uDFFF]" + // surrogate pair
"|\\\\(?:[a-zA-Z]+|[^\uD800-\uDFFF])" + // function name
")"
);
var whitespaceRegex = /\s*/;
/**
* This function lexes a single normal token. It takes a position and
* whether it should completely ignore whitespace or not.
*/
Lexer.prototype._innerLex = function(pos, ignoreWhitespace) {
var input = this._input;
if (pos === input.length) {
return new Token("EOF", null, pos);
}
var match = matchAt(tokenRegex, input, pos);
if (match === null) {
throw new ParseError(
"Unexpected character: '" + input[pos] + "'",
this, pos);
} else if (match[2]) { // matched non-whitespace
return new Token(match[2], null, pos + match[2].length);
} else if (ignoreWhitespace) {
return this._innerLex(pos + match[1].length, true);
} else { // concatenate whitespace to a single space
return new Token(" ", null, pos + match[1].length);
}
};
// A regex to match a CSS color (like #ffffff or BlueViolet)
var cssColor = /#[a-z0-9]+|[a-z]+/i;
/**
* This function lexes a CSS color.
*/
Lexer.prototype._innerLexColor = function(pos) {
var input = this._input;
// Ignore whitespace
var whitespace = matchAt(whitespaceRegex, input, pos)[0];
pos += whitespace.length;
var match;
if ((match = matchAt(cssColor, input, pos))) {
// If we look like a color, return a color
return new Token(match[0], null, pos + match[0].length);
} else {
throw new ParseError("Invalid color", this, pos);
}
};
// A regex to match a dimension. Dimensions look like
// "1.2em" or ".4pt" or "1 ex"
var sizeRegex = /(-?)\s*(\d+(?:\.\d*)?|\.\d+)\s*([a-z]{2})/;
/**
* This function lexes a dimension.
*/
Lexer.prototype._innerLexSize = function(pos) {
var input = this._input;
// Ignore whitespace
var whitespace = matchAt(whitespaceRegex, input, pos)[0];
pos += whitespace.length;
var match;
if ((match = matchAt(sizeRegex, input, pos))) {
var unit = match[3];
// We only currently handle "em" and "ex" units
// if (unit !== "em" && unit !== "ex") {
// throw new ParseError("Invalid unit: '" + unit + "'", this, pos);
// }
return new Token(match[0], {
number: +(match[1] + match[2]),
unit: unit,
}, pos + match[0].length);
}
throw new ParseError("Invalid size", this, pos);
};
/**
* This function lexes a string of whitespace.
*/
Lexer.prototype._innerLexWhitespace = function(pos) {
var input = this._input;
var whitespace = matchAt(whitespaceRegex, input, pos)[0];
pos += whitespace.length;
return new Token(whitespace[0], null, pos);
};
/**
* This function lexes a single token starting at `pos` and of the given mode.
* Based on the mode, we defer to one of the `_innerLex` functions.
*/
Lexer.prototype.lex = function(pos, mode) {
if (mode === "math") {
return this._innerLex(pos, true);
} else if (mode === "text") {
return this._innerLex(pos, false);
} else if (mode === "color") {
return this._innerLexColor(pos);
} else if (mode === "size") {
return this._innerLexSize(pos);
} else if (mode === "whitespace") {
return this._innerLexWhitespace(pos);
}
};
module.exports = Lexer;
/**
* This file contains information about the options that the Parser carries
* around with it while parsing. Data is held in an `Options` object, and when
* recursing, a new `Options` object can be created with the `.with*` and
* `.reset` functions.
*/
/**
* This is the main options class. It contains the style, size, color, and font
* of the current parse level. It also contains the style and size of the parent
* parse level, so size changes can be handled efficiently.
*
* Each of the `.with*` and `.reset` functions passes its current style and size
* as the parentStyle and parentSize of the new options class, so parent
* handling is taken care of automatically.
*/
function Options(data) {
this.style = data.style;
this.color = data.color;
this.size = data.size;
this.phantom = data.phantom;
this.font = data.font;
if (data.parentStyle === undefined) {
this.parentStyle = data.style;
} else {
this.parentStyle = data.parentStyle;
}
if (data.parentSize === undefined) {
this.parentSize = data.size;
} else {
this.parentSize = data.parentSize;
}
}
/**
* Returns a new options object with the same properties as "this". Properties
* from "extension" will be copied to the new options object.
*/
Options.prototype.extend = function(extension) {
var data = {
style: this.style,
size: this.size,
color: this.color,
parentStyle: this.style,
parentSize: this.size,
phantom: this.phantom,
font: this.font,
};
for (var key in extension) {
if (extension.hasOwnProperty(key)) {
data[key] = extension[key];
}
}
return new Options(data);
};
/**
* Create a new options object with the given style.
*/
Options.prototype.withStyle = function(style) {
return this.extend({
style: style,
});
};
/**
* Create a new options object with the given size.
*/
Options.prototype.withSize = function(size) {
return this.extend({
size: size,
});
};
/**
* Create a new options object with the given color.
*/
Options.prototype.withColor = function(color) {
return this.extend({
color: color,
});
};
/**
* Create a new options object with "phantom" set to true.
*/
Options.prototype.withPhantom = function() {
return this.extend({
phantom: true,
});
};
/**
* Create a new options objects with the give font.
*/
Options.prototype.withFont = function(font) {
return this.extend({
font: font,
});
};
/**
* Create a new options object with the same style, size, and color. This is
* used so that parent style and size changes are handled correctly.
*/
Options.prototype.reset = function() {
return this.extend({});
};
/**
* A map of color names to CSS colors.
* TODO(emily): Remove this when we have real macros
*/
var colorMap = {
"katex-blue": "#6495ed",
"katex-orange": "#ffa500",
"katex-pink": "#ff00af",
"katex-red": "#df0030",
"katex-green": "#28ae7b",
"katex-gray": "gray",
"katex-purple": "#9d38bd",
"katex-blueA": "#c7e9f1",
"katex-blueB": "#9cdceb",
"katex-blueC": "#58c4dd",
"katex-blueD": "#29abca",
"katex-blueE": "#1c758a",
"katex-tealA": "#acead7",
"katex-tealB": "#76ddc0",
"katex-tealC": "#5cd0b3",
"katex-tealD": "#55c1a7",
"katex-tealE": "#49a88f",
"katex-greenA": "#c9e2ae",
"katex-greenB": "#a6cf8c",
"katex-greenC": "#83c167",
"katex-greenD": "#77b05d",
"katex-greenE": "#699c52",
"katex-goldA": "#f7c797",
"katex-goldB": "#f9b775",
"katex-goldC": "#f0ac5f",
"katex-goldD": "#e1a158",
"katex-goldE": "#c78d46",
"katex-redA": "#f7a1a3",
"katex-redB": "#ff8080",
"katex-redC": "#fc6255",
"katex-redD": "#e65a4c",
"katex-redE": "#cf5044",
"katex-maroonA": "#ecabc1",
"katex-maroonB": "#ec92ab",
"katex-maroonC": "#c55f73",
"katex-maroonD": "#a24d61",
"katex-maroonE": "#94424f",
"katex-purpleA": "#caa3e8",
"katex-purpleB": "#b189c6",
"katex-purpleC": "#9a72ac",
"katex-purpleD": "#715582",
"katex-purpleE": "#644172",
"katex-mintA": "#f5f9e8",
"katex-mintB": "#edf2df",
"katex-mintC": "#e0e5cc",
"katex-grayA": "#fdfdfd",
"katex-grayB": "#f7f7f7",
"katex-grayC": "#eeeeee",
"katex-grayD": "#dddddd",
"katex-grayE": "#cccccc",
"katex-grayF": "#aaaaaa",
"katex-grayG": "#999999",
"katex-grayH": "#555555",
"katex-grayI": "#333333",
"katex-kaBlue": "#314453",
"katex-kaGreen": "#639b24",
};
/**
* Gets the CSS color of the current options object, accounting for the
* `colorMap`.
*/
Options.prototype.getColor = function() {
if (this.phantom) {
return "transparent";
} else {
return colorMap[this.color] || this.color;
}
};
module.exports = Options;
/**
* This is the ParseError class, which is the main error thrown by KaTeX
* functions when something has gone wrong. This is used to distinguish internal
* errors from errors in the expression that the user provided.
*/
function ParseError(message, lexer, position) {
var error = "KaTeX parse error: " + message;
if (lexer !== undefined && position !== undefined) {
// If we have the input and a position, make the error a bit fancier
// Prepend some information
error += " at position " + position + ": ";
// Get the input
var input = lexer._input;
// Insert a combining underscore at the correct position
input = input.slice(0, position) + "\u0332" +
input.slice(position);
// Extract some context from the input and add it to the error
var begin = Math.max(0, position - 15);
var end = position + 15;
error += input.slice(begin, end);
}
// Some hackery to make ParseError a prototype of Error
// See http://stackoverflow.com/a/8460753
var self = new Error(error);
self.name = "ParseError";
self.__proto__ = ParseError.prototype;
self.position = position;
return self;
}
// More hackery
ParseError.prototype.__proto__ = Error.prototype;
module.exports = ParseError;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment