Commit 76227f0d authored by lintangsutawika's avatar lintangsutawika
Browse files

add codexglue

parent ac50adb5
#!/usr/bin/python
'''
"""
This script was adapted from the original version by hieuhoang1972 which is part of MOSES.
'''
"""
# $Id: bleu.py 1307 2007-03-14 22:22:36Z hieuhoang1972 $
'''Provides:
"""Provides:
cook_refs(refs, n=4): Transform a list of reference sentences as strings into a form usable by cook_test().
cook_test(test, refs, n=4): Transform a test sentence as a string (together with the cooked reference sentences) into a form usable by score_cooked().
......@@ -15,7 +15,7 @@ score_cooked(alltest, n=4): Score a list of cooked test sentences.
score_set(s, testid, refids, n=4): Interface with dataset.py; calculate BLEU score of testid against refids.
The reason for breaking the BLEU computation into three phases cook_refs(), cook_test(), and score_cooked() is to allow the caller to calculate BLEU scores for multiple test sets as efficiently as possible.
'''
"""
import sys, math, re, xml.sax.saxutils
import subprocess
......@@ -28,32 +28,42 @@ preserve_case = False
eff_ref_len = "shortest"
normalize1 = [
('<skipped>', ''), # strip "skipped" tags
(r'-\n', ''), # strip end-of-line hyphenation and join lines
(r'\n', ' '), # join lines
# (r'(\d)\s+(?=\d)', r'\1'), # join digits
("<skipped>", ""), # strip "skipped" tags
(r"-\n", ""), # strip end-of-line hyphenation and join lines
(r"\n", " "), # join lines
# (r'(\d)\s+(?=\d)', r'\1'), # join digits
]
normalize1 = [(re.compile(pattern), replace) for (pattern, replace) in normalize1]
normalize2 = [
(r'([\{-\~\[-\` -\&\(-\+\:-\@\/])',r' \1 '), # tokenize punctuation. apostrophe is missing
(r'([^0-9])([\.,])',r'\1 \2 '), # tokenize period and comma unless preceded by a digit
(r'([\.,])([^0-9])',r' \1 \2'), # tokenize period and comma unless followed by a digit
(r'([0-9])(-)',r'\1 \2 ') # tokenize dash when preceded by a digit
(
r"([\{-\~\[-\` -\&\(-\+\:-\@\/])",
r" \1 ",
), # tokenize punctuation. apostrophe is missing
(
r"([^0-9])([\.,])",
r"\1 \2 ",
), # tokenize period and comma unless preceded by a digit
(
r"([\.,])([^0-9])",
r" \1 \2",
), # tokenize period and comma unless followed by a digit
(r"([0-9])(-)", r"\1 \2 "), # tokenize dash when preceded by a digit
]
normalize2 = [(re.compile(pattern), replace) for (pattern, replace) in normalize2]
def normalize(s):
'''Normalize and tokenize text. This is lifted from NIST mteval-v11a.pl.'''
"""Normalize and tokenize text. This is lifted from NIST mteval-v11a.pl."""
# Added to bypass NIST-style pre-processing of hyp and ref files -- wade
if (nonorm):
if nonorm:
return s.split()
if type(s) is not str:
s = " ".join(s)
# language-independent part:
for (pattern, replace) in normalize1:
s = re.sub(pattern, replace, s)
s = xml.sax.saxutils.unescape(s, {'&quot;':'"'})
s = xml.sax.saxutils.unescape(s, {"&quot;": '"'})
# language-dependent part (assuming Western languages):
s = " %s " % s
if not preserve_case:
......@@ -62,31 +72,34 @@ def normalize(s):
s = re.sub(pattern, replace, s)
return s.split()
def count_ngrams(words, n=4):
counts = {}
for k in range(1,n+1):
for i in range(len(words)-k+1):
ngram = tuple(words[i:i+k])
counts[ngram] = counts.get(ngram, 0)+1
for k in range(1, n + 1):
for i in range(len(words) - k + 1):
ngram = tuple(words[i : i + k])
counts[ngram] = counts.get(ngram, 0) + 1
return counts
def cook_refs(refs, n=4):
'''Takes a list of reference sentences for a single segment
"""Takes a list of reference sentences for a single segment
and returns an object that encapsulates everything that BLEU
needs to know about them.'''
needs to know about them."""
refs = [normalize(ref) for ref in refs]
maxcounts = {}
for ref in refs:
counts = count_ngrams(ref, n)
for (ngram,count) in counts.items():
maxcounts[ngram] = max(maxcounts.get(ngram,0), count)
for (ngram, count) in counts.items():
maxcounts[ngram] = max(maxcounts.get(ngram, 0), count)
return ([len(ref) for ref in refs], maxcounts)
def cook_test(test, item, n=4):
'''Takes a test sentence and returns an object that
encapsulates everything that BLEU needs to know about it.'''
(reflens, refmaxcounts)=item
"""Takes a test sentence and returns an object that
encapsulates everything that BLEU needs to know about it."""
(reflens, refmaxcounts) = item
test = normalize(test)
result = {}
result["testlen"] = len(test)
......@@ -96,89 +109,97 @@ def cook_test(test, item, n=4):
if eff_ref_len == "shortest":
result["reflen"] = min(reflens)
elif eff_ref_len == "average":
result["reflen"] = float(sum(reflens))/len(reflens)
result["reflen"] = float(sum(reflens)) / len(reflens)
elif eff_ref_len == "closest":
min_diff = None
for reflen in reflens:
if min_diff is None or abs(reflen-len(test)) < min_diff:
min_diff = abs(reflen-len(test))
result['reflen'] = reflen
if min_diff is None or abs(reflen - len(test)) < min_diff:
min_diff = abs(reflen - len(test))
result["reflen"] = reflen
result["guess"] = [max(len(test)-k+1,0) for k in range(1,n+1)]
result["guess"] = [max(len(test) - k + 1, 0) for k in range(1, n + 1)]
result['correct'] = [0]*n
result["correct"] = [0] * n
counts = count_ngrams(test, n)
for (ngram, count) in counts.items():
result["correct"][len(ngram)-1] += min(refmaxcounts.get(ngram,0), count)
result["correct"][len(ngram) - 1] += min(refmaxcounts.get(ngram, 0), count)
return result
def score_cooked(allcomps, n=4, ground=0, smooth=1):
totalcomps = {'testlen':0, 'reflen':0, 'guess':[0]*n, 'correct':[0]*n}
totalcomps = {"testlen": 0, "reflen": 0, "guess": [0] * n, "correct": [0] * n}
for comps in allcomps:
for key in ['testlen','reflen']:
for key in ["testlen", "reflen"]:
totalcomps[key] += comps[key]
for key in ['guess','correct']:
for key in ["guess", "correct"]:
for k in range(n):
totalcomps[key][k] += comps[key][k]
logbleu = 0.0
all_bleus = []
for k in range(n):
correct = totalcomps['correct'][k]
guess = totalcomps['guess'][k]
correct = totalcomps["correct"][k]
guess = totalcomps["guess"][k]
addsmooth = 0
if smooth == 1 and k > 0:
addsmooth = 1
logbleu += math.log(correct + addsmooth + sys.float_info.min)-math.log(guess + addsmooth+ sys.float_info.min)
logbleu += math.log(correct + addsmooth + sys.float_info.min) - math.log(
guess + addsmooth + sys.float_info.min
)
if guess == 0:
all_bleus.append(-10000000)
else:
all_bleus.append(math.log(correct + sys.float_info.min)-math.log( guess ))
all_bleus.append(math.log(correct + sys.float_info.min) - math.log(guess))
logbleu /= float(n)
all_bleus.insert(0, logbleu)
brevPenalty = min(0,1-float(totalcomps['reflen'] + 1)/(totalcomps['testlen'] + 1))
brevPenalty = min(
0, 1 - float(totalcomps["reflen"] + 1) / (totalcomps["testlen"] + 1)
)
for i in range(len(all_bleus)):
if i ==0:
if i == 0:
all_bleus[i] += brevPenalty
all_bleus[i] = math.exp(all_bleus[i])
return all_bleus
def bleu(refs, candidate, ground=0, smooth=1):
refs = cook_refs(refs)
test = cook_test(candidate, refs)
return score_cooked([test], ground=ground, smooth=smooth)
def splitPuncts(line):
return ' '.join(re.findall(r"[\w]+|[^\s\w]", line))
return " ".join(re.findall(r"[\w]+|[^\s\w]", line))
def computeMaps(predictions, goldfile):
predictionMap = {}
goldMap = {}
gf = open(goldfile, 'r')
gf = open(goldfile, "r")
for row in predictions:
cols = row.strip().split('\t')
cols = row.strip().split("\t")
if len(cols) == 1:
(rid, pred) = (cols[0], '')
(rid, pred) = (cols[0], "")
else:
(rid, pred) = (cols[0], cols[1])
predictionMap[rid] = [splitPuncts(pred.strip().lower())]
for row in gf:
(rid, pred) = row.split('\t')
(rid, pred) = row.split("\t")
if rid in predictionMap: # Only insert if the id exists for the method
if rid not in goldMap:
goldMap[rid] = []
goldMap[rid].append(splitPuncts(pred.strip().lower()))
sys.stderr.write('Total: ' + str(len(goldMap)) + '\n')
sys.stderr.write("Total: " + str(len(goldMap)) + "\n")
return (goldMap, predictionMap)
#m1 is the reference map
#m2 is the prediction map
# m1 is the reference map
# m2 is the prediction map
def bleuFromMaps(m1, m2):
score = [0] * 5
num = 0.0
......@@ -186,7 +207,7 @@ def bleuFromMaps(m1, m2):
for key in m1:
if key in m2:
bl = bleu(m1[key], m2[key][0])
score = [ score[i] + bl[i] for i in range(0, len(bl))]
score = [score[i] + bl[i] for i in range(0, len(bl))]
num += 1
return [s * 100.0 / num for s in score]
......@@ -204,10 +225,11 @@ def smoothed_bleu_4(references, predictions, **kwargs):
return bleuFromMaps(goldMap, predictionMap)[0]
if __name__ == '__main__':
if __name__ == "__main__":
reference_file = sys.argv[1]
predictions = []
for row in sys.stdin:
predictions.append(row)
(goldMap, predictionMap) = computeMaps(predictions, reference_file)
print (bleuFromMaps(goldMap, predictionMap)[0])
print(bleuFromMaps(goldMap, predictionMap)[0])
def doc_to_text(doc):
inputs = ' '.join(doc['code_tokens']).replace('\n',' ')
inputs = ' '.join(inputs.strip().split())
inputs = " ".join(doc["code_tokens"]).replace("\n", " ")
inputs = " ".join(inputs.strip().split())
return inputs
def doc_to_target(doc):
targets = ' '.join(doc['docstring_tokens']).replace('\n','')
targets = ' '.join(targets.strip().split())
targets = " ".join(doc["docstring_tokens"]).replace("\n", "")
targets = " ".join(targets.strip().split())
return targets
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment