Removing bug possibilities and adding timing info

7a9c4a03 · rprenger · 29dd0a35 · 7a9c4a03 · 7a9c4a03 · 7a9c4a03
Commit 7a9c4a03 authored Jul 19, 2021 by rprenger
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 2 deletions

megatron/api_server.py megatron/api_server.py +1 -1

megatron/text_generation_utils.py megatron/text_generation_utils.py +5 -0

tools/run_cli.py tools/run_cli.py +3 -1

No files found.
--- a/megatron/api_server.py
+++ b/megatron/api_server.py
@@ -61,4 +61,4 @@ class MegatronServer(object):
        api.add_resource(MegatronGenerate, '/generate', resource_class_args=[model])
    def run(self, url):
-        self.app.run(url, debug=False)
+        self.app.run(url, threaded=False, debug=False)
--- a/megatron/text_generation_utils.py
+++ b/megatron/text_generation_utils.py
@@ -162,6 +162,9 @@ def synced_generate(model, context_length_tensor, context_tokens_tensor, max_len
 def generate(model, sentences=None, max_len=0):
    if torch.distributed.get_rank() == 0:
        context_tokens_tensor, context_length_tensor = tokenize_batch(sentences)
+        c = context_length_tensor[0]
+        b = context_tokens_tensor.size(0)
+        start = time.time()
        send_generate_info(context_tokens_tensor, context_length_tensor, max_len)
    else:
        context_length_tensor, context_tokens_tensor, max_len = receive_generate_info()
@@ -176,6 +179,8 @@ def generate(model, sentences=None, max_len=0):
        for i in range(decode_tokens.size(0)):
            decode_token = decode_tokens[i,:].cpu().numpy().tolist()
            resp_sentences.append(tokenizer.detokenize(decode_token))
+        end = time.time()
+        print(str(b)+","+str(c)+","+str(decode_tokens.size(1))+","+str(end-start), flush=True)
        return resp_sentences
 def switch(val1, val2, boolean):

--- a/tools/run_cli.py
+++ b/tools/run_cli.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
+import sys
 import urllib2
 class PutRequest(urllib2.Request):
    '''class to handling putting with urllib2'''
@@ -21,11 +22,12 @@ class PutRequest(urllib2.Request):
        return 'PUT'
 if __name__ == "__main__":
+    url = sys.argv[1]
    while True:
        sentence = raw_input("Enter prompt: ")
        max_len = int(input("Enter number tokens output: "))
        data = json.dumps({"sentences": [sentence], "max_len":max_len})
-        req = PutRequest("http://sc-sdgx2-484:5000/generate", data, {'Content-Type': 'application/json'})
+        req = PutRequest(url, data, {'Content-Type': 'application/json'})
        response = urllib2.urlopen(req)
        resp_sentences = json.load(response)
        print("Megatron Response: ")