interactive_text_analyzer.ipynb 6.23 KB
Newer Older
Ivan Bogatyy's avatar
Ivan Bogatyy committed
1
2
3
4
5
6
7
8
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
Ivan Bogatyy's avatar
Ivan Bogatyy committed
9
10
    "editable": true,
    "scrolled": false
Ivan Bogatyy's avatar
Ivan Bogatyy committed
11
12
13
   },
   "outputs": [],
   "source": [
Ivan Bogatyy's avatar
Ivan Bogatyy committed
14
    "import os\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
15
16
17
18
19
    "import ipywidgets as widgets\n",
    "import tensorflow as tf\n",
    "from IPython import display\n",
    "from dragnn.protos import spec_pb2\n",
    "from dragnn.python import graph_builder\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
20
    "from dragnn.python import spec_builder\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
21
22
23
24
25
26
    "from dragnn.python import load_dragnn_cc_impl  # This loads the actual op definitions\n",
    "from dragnn.python import render_parse_tree_graphviz\n",
    "from dragnn.python import visualization\n",
    "from google.protobuf import text_format\n",
    "from syntaxnet import load_parser_ops  # This loads the actual op definitions\n",
    "from syntaxnet import sentence_pb2\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
27
    "from syntaxnet.ops import gen_parser_ops\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
28
29
    "from tensorflow.python.platform import tf_logging as logging\n",
    "\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
    "def load_model(base_dir, master_spec_name, checkpoint_name):\n",
    "    # Read the master spec\n",
    "    master_spec = spec_pb2.MasterSpec()\n",
    "    with open(os.path.join(base_dir, master_spec_name), \"r\") as f:\n",
    "        text_format.Merge(f.read(), master_spec)\n",
    "    spec_builder.complete_master_spec(master_spec, None, base_dir)\n",
    "    logging.set_verbosity(logging.WARN)  # Turn off TensorFlow spam.\n",
    "\n",
    "    # Initialize a graph\n",
    "    graph = tf.Graph()\n",
    "    with graph.as_default():\n",
    "        hyperparam_config = spec_pb2.GridPoint()\n",
    "        builder = graph_builder.MasterBuilder(master_spec, hyperparam_config)\n",
    "        # This is the component that will annotate test sentences.\n",
    "        annotator = builder.add_annotation(enable_tracing=True)\n",
    "        builder.add_saver()  # \"Savers\" can save and load models; here, we're only going to load.\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
46
    "\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
47
48
49
50
51
52
53
54
55
56
    "    sess = tf.Session(graph=graph)\n",
    "    with graph.as_default():\n",
    "        sess.run(tf.global_variables_initializer())\n",
    "        sess.run('save/restore_all', {'save/Const:0': os.path.join(base_dir, checkpoint_name)})\n",
    "        \n",
    "    def annotate_sentence(sentence):\n",
    "        with graph.as_default():\n",
    "            return sess.run([annotator['annotations'], annotator['traces']],\n",
    "                            feed_dict={annotator['input_batch']: [sentence]})\n",
    "    return annotate_sentence\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
57
    "\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
58
59
    "segmenter_model = load_model(\"data/es/segmenter\", \"spec.textproto\", \"checkpoint\")\n",
    "parser_model = load_model(\"data/es\", \"parser_spec.textproto\", \"checkpoint\")"
Ivan Bogatyy's avatar
Ivan Bogatyy committed
60
61
62
63
64
65
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
Ivan Bogatyy's avatar
Ivan Bogatyy committed
66
67
68
    "collapsed": false,
    "deletable": true,
    "editable": true
Ivan Bogatyy's avatar
Ivan Bogatyy committed
69
70
71
72
73
   },
   "outputs": [],
   "source": [
    "def annotate_text(text):\n",
    "    sentence = sentence_pb2.Sentence(\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
74
    "        text=text,\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
75
76
    "        token=[sentence_pb2.Token(word=word, start=-1, end=-1) for word in text.split()]\n",
    "    )\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
77
78
79
80
81
82
83
84
85
86
87
    "\n",
    "    # preprocess\n",
    "    with tf.Session(graph=tf.Graph()) as tmp_session:\n",
    "        char_input = gen_parser_ops.char_token_generator([sentence.SerializeToString()])\n",
    "        preprocessed = tmp_session.run(char_input)[0]\n",
    "    segmented, _ = segmenter_model(preprocessed)\n",
    "\n",
    "    annotations, traces = parser_model(segmented[0])\n",
    "    assert len(annotations) == 1\n",
    "    assert len(traces) == 1\n",
    "    return sentence_pb2.Sentence.FromString(annotations[0]), traces[0]\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
88
89
90
91
92
    "annotate_text(\"casa\"); None  # just make sure it works"
   ]
  },
  {
   "cell_type": "markdown",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
93
94
95
96
   "metadata": {
    "deletable": true,
    "editable": true
   },
Ivan Bogatyy's avatar
Ivan Bogatyy committed
97
98
99
100
101
102
103
104
105
   "source": [
    "# Interactive trace explorer\n",
    "Run the cell below, and then enter text in the interactive widget."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
Ivan Bogatyy's avatar
Ivan Bogatyy committed
106
107
108
    "collapsed": false,
    "deletable": true,
    "editable": true
Ivan Bogatyy's avatar
Ivan Bogatyy committed
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
   },
   "outputs": [],
   "source": [
    "def _trace_explorer():  # put stuff in a function to not pollute global scope\n",
    "    text = widgets.Text()\n",
    "    display.display(text)\n",
    "\n",
    "    output = visualization.InteractiveVisualization()\n",
    "    display.display(display.HTML(output.initial_html()))\n",
    "\n",
    "    def handle_submit(sender):\n",
    "        del sender  # unused\n",
    "        parse_tree, trace = annotate_text(text.value)\n",
    "        display.display(display.HTML(output.show_trace(trace)))\n",
    "\n",
    "\n",
    "    text.on_submit(handle_submit)\n",
    "_trace_explorer()"
   ]
  },
  {
   "cell_type": "markdown",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
131
132
133
134
   "metadata": {
    "deletable": true,
    "editable": true
   },
Ivan Bogatyy's avatar
Ivan Bogatyy committed
135
136
137
138
139
140
141
142
143
   "source": [
    "# Interactive parse tree explorer\n",
    "Run the cell below, and then enter text in the interactive widget."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
Ivan Bogatyy's avatar
Ivan Bogatyy committed
144
145
146
    "collapsed": false,
    "deletable": true,
    "editable": true
Ivan Bogatyy's avatar
Ivan Bogatyy committed
147
148
149
150
151
152
153
154
155
156
157
158
   },
   "outputs": [],
   "source": [
    "def _parse_tree_explorer():  # put stuff in a function to not pollute global scope\n",
    "    text = widgets.Text()\n",
    "    display.display(text)\n",
    "    html = widgets.HTML()\n",
    "    display.display(html)\n",
    "\n",
    "    def handle_submit(sender):\n",
    "        del sender  # unused\n",
    "        parse_tree, trace = annotate_text(text.value)\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
159
160
161
162
    "        html.value = u\"\"\"\n",
    "        <div style=\"max-width: 100%\">{}</div>\n",
    "        <style type=\"text/css\">svg {{ max-width: 100%; }}</style>\n",
    "        \"\"\".format(render_parse_tree_graphviz.parse_tree_graph(parse_tree))\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
    "\n",
    "    text.on_submit(handle_submit)\n",
    "_parse_tree_explorer()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}