Commit 4fddb5e0 authored by sgn-andot's avatar sgn-andot Committed by calberti
Browse files

Fixed conll2tree.py; handling correctly a sentence that has multiple elements...

Fixed conll2tree.py; handling correctly a sentence that has multiple elements with identical representation (#292)
parent 3a952ec6
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""A program to generate ASCII trees from conll files.""" """A program to generate ASCII trees from conll files."""
import collections import collections
import re
import asciitree import asciitree
import tensorflow as tf import tensorflow as tf
...@@ -39,18 +40,21 @@ flags.DEFINE_string('corpus_name', 'stdin-conll', ...@@ -39,18 +40,21 @@ flags.DEFINE_string('corpus_name', 'stdin-conll',
def to_dict(sentence): def to_dict(sentence):
"""Builds a dictionary representing the parse tree of a sentence. """Builds a dictionary representing the parse tree of a sentence.
Note that the suffix "@id" (where 'id' is a number) is appended to each element
to handle the sentence that has multiple elements with identical representation.
Those suffix needs to be removed after the asciitree is rendered.
Args: Args:
sentence: Sentence protocol buffer to represent. sentence: Sentence protocol buffer to represent.
Returns: Returns:
Dictionary mapping tokens to children. Dictionary mapping tokens to children.
""" """
token_str = ['%s %s %s' % (token.word, token.tag, token.label) token_str = list()
for token in sentence.token]
children = [[] for token in sentence.token] children = [[] for token in sentence.token]
root = -1 root = -1
for i in range(0, len(sentence.token)): for i in range(0, len(sentence.token)):
token = sentence.token[i] token = sentence.token[i]
token_str.append('%s %s %s @%d' % (token.word, token.tag, token.label, (i+1)))
if token.head == -1: if token.head == -1:
root = i root = i
else: else:
...@@ -83,7 +87,10 @@ def main(unused_argv): ...@@ -83,7 +87,10 @@ def main(unused_argv):
d = to_dict(sentence) d = to_dict(sentence)
print 'Input: %s' % sentence.text print 'Input: %s' % sentence.text
print 'Parse:' print 'Parse:'
print tr(d) tr_str = tr(d)
pat = re.compile('\s*@\d+$')
for tr_ln in tr_str.splitlines():
print pat.sub('', tr_ln)
if finished: if finished:
break break
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment