Commit 76227f0d authored by lintangsutawika's avatar lintangsutawika
Browse files

add codexglue

parent ac50adb5
#!/usr/bin/python #!/usr/bin/python
''' """
This script was adapted from the original version by hieuhoang1972 which is part of MOSES. This script was adapted from the original version by hieuhoang1972 which is part of MOSES.
''' """
# $Id: bleu.py 1307 2007-03-14 22:22:36Z hieuhoang1972 $ # $Id: bleu.py 1307 2007-03-14 22:22:36Z hieuhoang1972 $
'''Provides: """Provides:
cook_refs(refs, n=4): Transform a list of reference sentences as strings into a form usable by cook_test(). cook_refs(refs, n=4): Transform a list of reference sentences as strings into a form usable by cook_test().
cook_test(test, refs, n=4): Transform a test sentence as a string (together with the cooked reference sentences) into a form usable by score_cooked(). cook_test(test, refs, n=4): Transform a test sentence as a string (together with the cooked reference sentences) into a form usable by score_cooked().
...@@ -15,7 +15,7 @@ score_cooked(alltest, n=4): Score a list of cooked test sentences. ...@@ -15,7 +15,7 @@ score_cooked(alltest, n=4): Score a list of cooked test sentences.
score_set(s, testid, refids, n=4): Interface with dataset.py; calculate BLEU score of testid against refids. score_set(s, testid, refids, n=4): Interface with dataset.py; calculate BLEU score of testid against refids.
The reason for breaking the BLEU computation into three phases cook_refs(), cook_test(), and score_cooked() is to allow the caller to calculate BLEU scores for multiple test sets as efficiently as possible. The reason for breaking the BLEU computation into three phases cook_refs(), cook_test(), and score_cooked() is to allow the caller to calculate BLEU scores for multiple test sets as efficiently as possible.
''' """
import sys, math, re, xml.sax.saxutils import sys, math, re, xml.sax.saxutils
import subprocess import subprocess
...@@ -28,167 +28,188 @@ preserve_case = False ...@@ -28,167 +28,188 @@ preserve_case = False
eff_ref_len = "shortest" eff_ref_len = "shortest"
normalize1 = [ normalize1 = [
('<skipped>', ''), # strip "skipped" tags ("<skipped>", ""), # strip "skipped" tags
(r'-\n', ''), # strip end-of-line hyphenation and join lines (r"-\n", ""), # strip end-of-line hyphenation and join lines
(r'\n', ' '), # join lines (r"\n", " "), # join lines
# (r'(\d)\s+(?=\d)', r'\1'), # join digits # (r'(\d)\s+(?=\d)', r'\1'), # join digits
] ]
normalize1 = [(re.compile(pattern), replace) for (pattern, replace) in normalize1] normalize1 = [(re.compile(pattern), replace) for (pattern, replace) in normalize1]
normalize2 = [ normalize2 = [
(r'([\{-\~\[-\` -\&\(-\+\:-\@\/])',r' \1 '), # tokenize punctuation. apostrophe is missing (
(r'([^0-9])([\.,])',r'\1 \2 '), # tokenize period and comma unless preceded by a digit r"([\{-\~\[-\` -\&\(-\+\:-\@\/])",
(r'([\.,])([^0-9])',r' \1 \2'), # tokenize period and comma unless followed by a digit r" \1 ",
(r'([0-9])(-)',r'\1 \2 ') # tokenize dash when preceded by a digit ), # tokenize punctuation. apostrophe is missing
(
r"([^0-9])([\.,])",
r"\1 \2 ",
), # tokenize period and comma unless preceded by a digit
(
r"([\.,])([^0-9])",
r" \1 \2",
), # tokenize period and comma unless followed by a digit
(r"([0-9])(-)", r"\1 \2 "), # tokenize dash when preceded by a digit
] ]
normalize2 = [(re.compile(pattern), replace) for (pattern, replace) in normalize2] normalize2 = [(re.compile(pattern), replace) for (pattern, replace) in normalize2]
def normalize(s): def normalize(s):
'''Normalize and tokenize text. This is lifted from NIST mteval-v11a.pl.''' """Normalize and tokenize text. This is lifted from NIST mteval-v11a.pl."""
# Added to bypass NIST-style pre-processing of hyp and ref files -- wade # Added to bypass NIST-style pre-processing of hyp and ref files -- wade
if (nonorm): if nonorm:
return s.split() return s.split()
if type(s) is not str: if type(s) is not str:
s = " ".join(s) s = " ".join(s)
# language-independent part: # language-independent part:
for (pattern, replace) in normalize1: for (pattern, replace) in normalize1:
s = re.sub(pattern, replace, s) s = re.sub(pattern, replace, s)
s = xml.sax.saxutils.unescape(s, {'&quot;':'"'}) s = xml.sax.saxutils.unescape(s, {"&quot;": '"'})
# language-dependent part (assuming Western languages): # language-dependent part (assuming Western languages):
s = " %s " % s s = " %s " % s
if not preserve_case: if not preserve_case:
s = s.lower() # this might not be identical to the original s = s.lower() # this might not be identical to the original
for (pattern, replace) in normalize2: for (pattern, replace) in normalize2:
s = re.sub(pattern, replace, s) s = re.sub(pattern, replace, s)
return s.split() return s.split()
def count_ngrams(words, n=4): def count_ngrams(words, n=4):
counts = {} counts = {}
for k in range(1,n+1): for k in range(1, n + 1):
for i in range(len(words)-k+1): for i in range(len(words) - k + 1):
ngram = tuple(words[i:i+k]) ngram = tuple(words[i : i + k])
counts[ngram] = counts.get(ngram, 0)+1 counts[ngram] = counts.get(ngram, 0) + 1
return counts return counts
def cook_refs(refs, n=4): def cook_refs(refs, n=4):
'''Takes a list of reference sentences for a single segment """Takes a list of reference sentences for a single segment
and returns an object that encapsulates everything that BLEU and returns an object that encapsulates everything that BLEU
needs to know about them.''' needs to know about them."""
refs = [normalize(ref) for ref in refs] refs = [normalize(ref) for ref in refs]
maxcounts = {} maxcounts = {}
for ref in refs: for ref in refs:
counts = count_ngrams(ref, n) counts = count_ngrams(ref, n)
for (ngram,count) in counts.items(): for (ngram, count) in counts.items():
maxcounts[ngram] = max(maxcounts.get(ngram,0), count) maxcounts[ngram] = max(maxcounts.get(ngram, 0), count)
return ([len(ref) for ref in refs], maxcounts) return ([len(ref) for ref in refs], maxcounts)
def cook_test(test, item, n=4): def cook_test(test, item, n=4):
'''Takes a test sentence and returns an object that """Takes a test sentence and returns an object that
encapsulates everything that BLEU needs to know about it.''' encapsulates everything that BLEU needs to know about it."""
(reflens, refmaxcounts)=item (reflens, refmaxcounts) = item
test = normalize(test) test = normalize(test)
result = {} result = {}
result["testlen"] = len(test) result["testlen"] = len(test)
# Calculate effective reference sentence length. # Calculate effective reference sentence length.
if eff_ref_len == "shortest": if eff_ref_len == "shortest":
result["reflen"] = min(reflens) result["reflen"] = min(reflens)
elif eff_ref_len == "average": elif eff_ref_len == "average":
result["reflen"] = float(sum(reflens))/len(reflens) result["reflen"] = float(sum(reflens)) / len(reflens)
elif eff_ref_len == "closest": elif eff_ref_len == "closest":
min_diff = None min_diff = None
for reflen in reflens: for reflen in reflens:
if min_diff is None or abs(reflen-len(test)) < min_diff: if min_diff is None or abs(reflen - len(test)) < min_diff:
min_diff = abs(reflen-len(test)) min_diff = abs(reflen - len(test))
result['reflen'] = reflen result["reflen"] = reflen
result["guess"] = [max(len(test)-k+1,0) for k in range(1,n+1)] result["guess"] = [max(len(test) - k + 1, 0) for k in range(1, n + 1)]
result['correct'] = [0]*n result["correct"] = [0] * n
counts = count_ngrams(test, n) counts = count_ngrams(test, n)
for (ngram, count) in counts.items(): for (ngram, count) in counts.items():
result["correct"][len(ngram)-1] += min(refmaxcounts.get(ngram,0), count) result["correct"][len(ngram) - 1] += min(refmaxcounts.get(ngram, 0), count)
return result return result
def score_cooked(allcomps, n=4, ground=0, smooth=1): def score_cooked(allcomps, n=4, ground=0, smooth=1):
totalcomps = {'testlen':0, 'reflen':0, 'guess':[0]*n, 'correct':[0]*n} totalcomps = {"testlen": 0, "reflen": 0, "guess": [0] * n, "correct": [0] * n}
for comps in allcomps: for comps in allcomps:
for key in ['testlen','reflen']: for key in ["testlen", "reflen"]:
totalcomps[key] += comps[key] totalcomps[key] += comps[key]
for key in ['guess','correct']: for key in ["guess", "correct"]:
for k in range(n): for k in range(n):
totalcomps[key][k] += comps[key][k] totalcomps[key][k] += comps[key][k]
logbleu = 0.0 logbleu = 0.0
all_bleus = [] all_bleus = []
for k in range(n): for k in range(n):
correct = totalcomps['correct'][k] correct = totalcomps["correct"][k]
guess = totalcomps['guess'][k] guess = totalcomps["guess"][k]
addsmooth = 0 addsmooth = 0
if smooth == 1 and k > 0: if smooth == 1 and k > 0:
addsmooth = 1 addsmooth = 1
logbleu += math.log(correct + addsmooth + sys.float_info.min)-math.log(guess + addsmooth+ sys.float_info.min) logbleu += math.log(correct + addsmooth + sys.float_info.min) - math.log(
if guess == 0: guess + addsmooth + sys.float_info.min
all_bleus.append(-10000000) )
else: if guess == 0:
all_bleus.append(math.log(correct + sys.float_info.min)-math.log( guess )) all_bleus.append(-10000000)
else:
all_bleus.append(math.log(correct + sys.float_info.min) - math.log(guess))
logbleu /= float(n) logbleu /= float(n)
all_bleus.insert(0, logbleu) all_bleus.insert(0, logbleu)
brevPenalty = min(0,1-float(totalcomps['reflen'] + 1)/(totalcomps['testlen'] + 1)) brevPenalty = min(
0, 1 - float(totalcomps["reflen"] + 1) / (totalcomps["testlen"] + 1)
)
for i in range(len(all_bleus)): for i in range(len(all_bleus)):
if i ==0: if i == 0:
all_bleus[i] += brevPenalty all_bleus[i] += brevPenalty
all_bleus[i] = math.exp(all_bleus[i]) all_bleus[i] = math.exp(all_bleus[i])
return all_bleus return all_bleus
def bleu(refs, candidate, ground=0, smooth=1):
def bleu(refs, candidate, ground=0, smooth=1):
refs = cook_refs(refs) refs = cook_refs(refs)
test = cook_test(candidate, refs) test = cook_test(candidate, refs)
return score_cooked([test], ground=ground, smooth=smooth) return score_cooked([test], ground=ground, smooth=smooth)
def splitPuncts(line): def splitPuncts(line):
return ' '.join(re.findall(r"[\w]+|[^\s\w]", line)) return " ".join(re.findall(r"[\w]+|[^\s\w]", line))
def computeMaps(predictions, goldfile): def computeMaps(predictions, goldfile):
predictionMap = {} predictionMap = {}
goldMap = {} goldMap = {}
gf = open(goldfile, 'r') gf = open(goldfile, "r")
for row in predictions: for row in predictions:
cols = row.strip().split('\t') cols = row.strip().split("\t")
if len(cols) == 1: if len(cols) == 1:
(rid, pred) = (cols[0], '') (rid, pred) = (cols[0], "")
else: else:
(rid, pred) = (cols[0], cols[1]) (rid, pred) = (cols[0], cols[1])
predictionMap[rid] = [splitPuncts(pred.strip().lower())] predictionMap[rid] = [splitPuncts(pred.strip().lower())]
for row in gf: for row in gf:
(rid, pred) = row.split('\t') (rid, pred) = row.split("\t")
if rid in predictionMap: # Only insert if the id exists for the method if rid in predictionMap: # Only insert if the id exists for the method
if rid not in goldMap: if rid not in goldMap:
goldMap[rid] = [] goldMap[rid] = []
goldMap[rid].append(splitPuncts(pred.strip().lower())) goldMap[rid].append(splitPuncts(pred.strip().lower()))
sys.stderr.write('Total: ' + str(len(goldMap)) + '\n') sys.stderr.write("Total: " + str(len(goldMap)) + "\n")
return (goldMap, predictionMap) return (goldMap, predictionMap)
#m1 is the reference map # m1 is the reference map
#m2 is the prediction map # m2 is the prediction map
def bleuFromMaps(m1, m2): def bleuFromMaps(m1, m2):
score = [0] * 5 score = [0] * 5
num = 0.0 num = 0.0
for key in m1: for key in m1:
if key in m2: if key in m2:
bl = bleu(m1[key], m2[key][0]) bl = bleu(m1[key], m2[key][0])
score = [ score[i] + bl[i] for i in range(0, len(bl))] score = [score[i] + bl[i] for i in range(0, len(bl))]
num += 1 num += 1
return [s * 100.0 / num for s in score] return [s * 100.0 / num for s in score]
def smoothed_bleu_4(references, predictions, **kwargs): def smoothed_bleu_4(references, predictions, **kwargs):
...@@ -197,17 +218,18 @@ def smoothed_bleu_4(references, predictions, **kwargs): ...@@ -197,17 +218,18 @@ def smoothed_bleu_4(references, predictions, **kwargs):
goldMap = {} goldMap = {}
for rid, pred in enumerate(predictions): for rid, pred in enumerate(predictions):
predictionMap[rid] = [splitPuncts(pred.strip().lower())] predictionMap[rid] = [splitPuncts(pred.strip().lower())]
for rid, row in enumerate(references): for rid, row in enumerate(references):
goldMap[rid] = [splitPuncts(row.strip().lower())] goldMap[rid] = [splitPuncts(row.strip().lower())]
return bleuFromMaps(goldMap, predictionMap)[0] return bleuFromMaps(goldMap, predictionMap)[0]
if __name__ == '__main__':
reference_file = sys.argv[1] if __name__ == "__main__":
predictions = [] reference_file = sys.argv[1]
for row in sys.stdin: predictions = []
predictions.append(row) for row in sys.stdin:
(goldMap, predictionMap) = computeMaps(predictions, reference_file) predictions.append(row)
print (bleuFromMaps(goldMap, predictionMap)[0]) (goldMap, predictionMap) = computeMaps(predictions, reference_file)
print(bleuFromMaps(goldMap, predictionMap)[0])
def doc_to_text(doc): def doc_to_text(doc):
inputs = ' '.join(doc['code_tokens']).replace('\n',' ') inputs = " ".join(doc["code_tokens"]).replace("\n", " ")
inputs = ' '.join(inputs.strip().split()) inputs = " ".join(inputs.strip().split())
return inputs return inputs
def doc_to_target(doc): def doc_to_target(doc):
targets = ' '.join(doc['docstring_tokens']).replace('\n','') targets = " ".join(doc["docstring_tokens"]).replace("\n", "")
targets = ' '.join(targets.strip().split()) targets = " ".join(targets.strip().split())
return targets return targets
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment