interactive_text_analyzer.ipynb 6.23 KB
Newer Older
Ivan Bogatyy's avatar
Ivan Bogatyy committed
1
2
3
4
5
6
7
8
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "deletable": true,
9
10
    "editable": true,
    "scrolled": false
Ivan Bogatyy's avatar
Ivan Bogatyy committed
11
12
13
   },
   "outputs": [],
   "source": [
14
    "import os\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
15
16
17
18
19
    "import ipywidgets as widgets\n",
    "import tensorflow as tf\n",
    "from IPython import display\n",
    "from dragnn.protos import spec_pb2\n",
    "from dragnn.python import graph_builder\n",
20
    "from dragnn.python import spec_builder\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
21
22
23
24
25
26
    "from dragnn.python import load_dragnn_cc_impl  # This loads the actual op definitions\n",
    "from dragnn.python import render_parse_tree_graphviz\n",
    "from dragnn.python import visualization\n",
    "from google.protobuf import text_format\n",
    "from syntaxnet import load_parser_ops  # This loads the actual op definitions\n",
    "from syntaxnet import sentence_pb2\n",
27
    "from syntaxnet.ops import gen_parser_ops\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
28
29
    "from tensorflow.python.platform import tf_logging as logging\n",
    "\n",
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
    "def load_model(base_dir, master_spec_name, checkpoint_name):\n",
    "    # Read the master spec\n",
    "    master_spec = spec_pb2.MasterSpec()\n",
    "    with open(os.path.join(base_dir, master_spec_name), \"r\") as f:\n",
    "        text_format.Merge(f.read(), master_spec)\n",
    "    spec_builder.complete_master_spec(master_spec, None, base_dir)\n",
    "    logging.set_verbosity(logging.WARN)  # Turn off TensorFlow spam.\n",
    "\n",
    "    # Initialize a graph\n",
    "    graph = tf.Graph()\n",
    "    with graph.as_default():\n",
    "        hyperparam_config = spec_pb2.GridPoint()\n",
    "        builder = graph_builder.MasterBuilder(master_spec, hyperparam_config)\n",
    "        # This is the component that will annotate test sentences.\n",
    "        annotator = builder.add_annotation(enable_tracing=True)\n",
    "        builder.add_saver()  # \"Savers\" can save and load models; here, we're only going to load.\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
46
    "\n",
47
48
49
50
51
52
53
54
55
56
    "    sess = tf.Session(graph=graph)\n",
    "    with graph.as_default():\n",
    "        sess.run(tf.global_variables_initializer())\n",
    "        sess.run('save/restore_all', {'save/Const:0': os.path.join(base_dir, checkpoint_name)})\n",
    "        \n",
    "    def annotate_sentence(sentence):\n",
    "        with graph.as_default():\n",
    "            return sess.run([annotator['annotations'], annotator['traces']],\n",
    "                            feed_dict={annotator['input_batch']: [sentence]})\n",
    "    return annotate_sentence\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
57
    "\n",
58
59
    "segmenter_model = load_model(\"data/es/segmenter\", \"spec.textproto\", \"checkpoint\")\n",
    "parser_model = load_model(\"data/es\", \"parser_spec.textproto\", \"checkpoint\")"
Ivan Bogatyy's avatar
Ivan Bogatyy committed
60
61
62
63
64
65
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
66
67
68
    "collapsed": false,
    "deletable": true,
    "editable": true
Ivan Bogatyy's avatar
Ivan Bogatyy committed
69
70
71
72
73
   },
   "outputs": [],
   "source": [
    "def annotate_text(text):\n",
    "    sentence = sentence_pb2.Sentence(\n",
74
    "        text=text,\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
75
76
    "        token=[sentence_pb2.Token(word=word, start=-1, end=-1) for word in text.split()]\n",
    "    )\n",
77
78
79
80
81
82
83
84
85
86
87
    "\n",
    "    # preprocess\n",
    "    with tf.Session(graph=tf.Graph()) as tmp_session:\n",
    "        char_input = gen_parser_ops.char_token_generator([sentence.SerializeToString()])\n",
    "        preprocessed = tmp_session.run(char_input)[0]\n",
    "    segmented, _ = segmenter_model(preprocessed)\n",
    "\n",
    "    annotations, traces = parser_model(segmented[0])\n",
    "    assert len(annotations) == 1\n",
    "    assert len(traces) == 1\n",
    "    return sentence_pb2.Sentence.FromString(annotations[0]), traces[0]\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
88
89
90
91
92
    "annotate_text(\"casa\"); None  # just make sure it works"
   ]
  },
  {
   "cell_type": "markdown",
93
94
95
96
   "metadata": {
    "deletable": true,
    "editable": true
   },
Ivan Bogatyy's avatar
Ivan Bogatyy committed
97
98
99
100
101
102
103
104
105
   "source": [
    "# Interactive trace explorer\n",
    "Run the cell below, and then enter text in the interactive widget."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
106
107
108
    "collapsed": false,
    "deletable": true,
    "editable": true
Ivan Bogatyy's avatar
Ivan Bogatyy committed
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
   },
   "outputs": [],
   "source": [
    "def _trace_explorer():  # put stuff in a function to not pollute global scope\n",
    "    text = widgets.Text()\n",
    "    display.display(text)\n",
    "\n",
    "    output = visualization.InteractiveVisualization()\n",
    "    display.display(display.HTML(output.initial_html()))\n",
    "\n",
    "    def handle_submit(sender):\n",
    "        del sender  # unused\n",
    "        parse_tree, trace = annotate_text(text.value)\n",
    "        display.display(display.HTML(output.show_trace(trace)))\n",
    "\n",
    "\n",
    "    text.on_submit(handle_submit)\n",
    "_trace_explorer()"
   ]
  },
  {
   "cell_type": "markdown",
131
132
133
134
   "metadata": {
    "deletable": true,
    "editable": true
   },
Ivan Bogatyy's avatar
Ivan Bogatyy committed
135
136
137
138
139
140
141
142
143
   "source": [
    "# Interactive parse tree explorer\n",
    "Run the cell below, and then enter text in the interactive widget."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
144
145
146
    "collapsed": false,
    "deletable": true,
    "editable": true
Ivan Bogatyy's avatar
Ivan Bogatyy committed
147
148
149
150
151
152
153
154
155
156
157
158
   },
   "outputs": [],
   "source": [
    "def _parse_tree_explorer():  # put stuff in a function to not pollute global scope\n",
    "    text = widgets.Text()\n",
    "    display.display(text)\n",
    "    html = widgets.HTML()\n",
    "    display.display(html)\n",
    "\n",
    "    def handle_submit(sender):\n",
    "        del sender  # unused\n",
    "        parse_tree, trace = annotate_text(text.value)\n",
159
160
161
162
    "        html.value = u\"\"\"\n",
    "        <div style=\"max-width: 100%\">{}</div>\n",
    "        <style type=\"text/css\">svg {{ max-width: 100%; }}</style>\n",
    "        \"\"\".format(render_parse_tree_graphviz.parse_tree_graph(parse_tree))\n",
Ivan Bogatyy's avatar
Ivan Bogatyy committed
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
    "\n",
    "    text.on_submit(handle_submit)\n",
    "_parse_tree_explorer()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}