initial llama

0211193c · zhuwenwen · 0211193c · 0211193c · 0211193c · 0211193c
Commit 0211193c authored Aug 17, 2023 by zhuwenwen
20 changed files
--- a/.clang-format
+++ b/.clang-format
+Language: Cpp
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AllowShortEnumsOnASingleLine: false
+AlignConsecutiveAssignments: true
+AlignConsecutiveDeclarations: true
+AlignEscapedNewlines: Right
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowAllArgumentsOnNextLine: true
+AllowShortBlocksOnASingleLine: Empty
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: false
+BinPackParameters: false
+BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeBraces: Stroustrup
+BreakBeforeTernaryOperators: false
+BreakConstructorInitializers: AfterColon
+BreakInheritanceList: AfterColon
+BreakStringLiterals: false
+ColumnLimit: 120
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+FixNamespaceComments: true
+IndentCaseLabels: true
+IndentPPDirectives: None
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: true
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+PointerAlignment: Left
+ReflowComments: true
+SortIncludes: true
+SortUsingDeclarations: false
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCtorInitializerColon: false
+SpaceBeforeInheritanceColon: false
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles: false
+SpacesInCStyleCastParentheses: false
+SpacesInContainerLiterals: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp11
+TabWidth: 4
+UseTab: Never
--- a/.dockerignore
+++ b/.dockerignore
+docker
+.dockerignore
+.gitlab
+.gitlab-ci.yml
+*build*
+./models
+__pycache__
+.vscode
+translation
+.cache
+*.npy
+*.pth
+*.o
+**/.ipynb_checkpoints
\ No newline at end of file
--- a/.flake8
+++ b/.flake8
+[flake8]
+ignore = W292
+exclude =
+    *migrations*,
+    # python related
+    *.pyc,
+    .git,
+    __pycache__,
+max-line-length=120
+max-complexity=12
+format=pylint
+show_source = True
+statistics = True
+count = True
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
+name: "Bug Report"
+description: Submit a bug report
+labels: [ "bug" ]
+body:
+  - type: input
+    id: branch
+    attributes:
+      label: Branch/Tag/Commit
+      description:
+      placeholder: ex,. main
+    validations:
+      required: true
+  - type: input
+    id: docker_image_version
+    attributes:
+      label: Docker Image Version
+      description:
+      placeholder: ex,. nvcr.io/nvidia/pytorch:22.08-py3
+    validations:
+      required: true
+  - type: input
+    id: gpu_name
+    attributes:
+      label: GPU name
+      description:
+      placeholder: ex,. A100
+    validations:
+      required: true
+  - type: input
+    id: cuda_driver
+    attributes:
+      label: CUDA Driver
+      description:
+      placeholder: ex,. 515.65.01
+    validations:
+      required: true
+  - type: textarea
+    id: reproduced-steps
+    attributes:
+      label: Reproduced Steps
+      description: Please provide the step to reproduce the bugs
+      render: shell
+      placeholder: |
+        Steps to reproduce your bugs: (please list what script you run and don't say following xxx, otherwise, we will ask again and again)
+        1. docker run -ti --gpus all nvcr.io/nvidia/pytorch:22.03-py3 bash
+        2. git clone https://github.com/NVIDIA/FasterTransformer.git
+        3. cd FasterTransformer mkdir build && cd build
+        4. cmake -DSM=80 -DCMAKE_BUILD_TYPE=Release .. && make -j12
+        5. ./bin/bert_example 32 12 32 12 64 0 0
+        6. What error you see.
+    validations:
+      required: true
--- a/.gitignore
+++ b/.gitignore
+*~
+*.o
+*build*/
+./models/
+__pycache__/
+.vscode
+.idea
+./translation
+.cache
+*.npy
+*.pth
+!tests/data/**/*.npy
+/models
+/notebooks
+**/.ipynb_checkpoints/
+.DS_Store
+/3rdparty/NeMo/
+/3rdparty/apex/
--- a/.gitmodules
+++ b/.gitmodules
+[submodule "3rdparty/Megatron-LM"]
+	path = 3rdparty/Megatron-LM
+	url = https://github.com/NVIDIA/Megatron-LM.git
+	branch = v2.6
+[submodule "examples/tensorflow/bert/tensorflow_bert/bert"]
+	path = examples/tensorflow/bert/tensorflow_bert/bert
+	url = https://github.com/google-research/bert.git
+[submodule "examples/pytorch/swin/Swin-Transformer-Quantization/SwinTransformer"]
+	path = examples/pytorch/swin/Swin-Transformer-Quantization/SwinTransformer
+	url = https://github.com/microsoft/Swin-Transformer
+[submodule "examples/pytorch/vit/ViT-quantization/ViT-pytorch"]
+	path = examples/pytorch/vit/ViT-quantization/ViT-pytorch
+	url = https://github.com/jeonsworld/ViT-pytorch
+[submodule "3rdparty/cutlass"]
+	path = 3rdparty/cutlass
+	url = https://github.com/NVIDIA/cutlass.git
--- a/3rdparty/CMakeLists.txt
+++ b/3rdparty/CMakeLists.txt
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+add_subdirectory(common)
+add_subdirectory(trt_fused_multihead_attention)
+if(ENABLE_FP8)
+add_subdirectory(fp8_qgmma_1x1)
+endif()
\ No newline at end of file
--- a/3rdparty/INIReader.h
+++ b/3rdparty/INIReader.h
+// Read an INI file into easy-to-access name/value pairs.
+// inih and INIReader are released under the New BSD license.
+// Go to the project home page for more info:
+//
+// https://github.com/benhoyt/inih (Initial repo)
+// https://github.com/jtilly/inih  (The reference of this header file)
+/* inih -- simple .INI file parser
+inih is released under the New BSD license (see LICENSE.txt). Go to the project
+home page for more info:
+https://github.com/benhoyt/inih
+https://github.com/jtilly/inih 
+*/
+#ifndef __INI_H__
+#define __INI_H__
+/* Make this header file easier to include in C++ code */
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include <stdio.h>
+/* Typedef for prototype of handler function. */
+typedef int (*ini_handler)(void* user, const char* section,
+                           const char* name, const char* value);
+/* Typedef for prototype of fgets-style reader function. */
+typedef char* (*ini_reader)(char* str, int num, void* stream);
+/* Parse given INI-style file. May have [section]s, name=value pairs
+   (whitespace stripped), and comments starting with ';' (semicolon). Section
+   is "" if name=value pair parsed before any section heading. name:value
+   pairs are also supported as a concession to Python's configparser.
+   For each name=value pair parsed, call handler function with given user
+   pointer as well as section, name, and value (data only valid for duration
+   of handler call). Handler should return nonzero on success, zero on error.
+   Returns 0 on success, line number of first error on parse error (doesn't
+   stop on first error), -1 on file open error, or -2 on memory allocation
+   error (only when INI_USE_STACK is zero).
+*/
+int ini_parse(const char* filename, ini_handler handler, void* user);
+/* Same as ini_parse(), but takes a FILE* instead of filename. This doesn't
+   close the file when it's finished -- the caller must do that. */
+int ini_parse_file(FILE* file, ini_handler handler, void* user);
+/* Same as ini_parse(), but takes an ini_reader function pointer instead of
+   filename. Used for implementing custom or string-based I/O. */
+int ini_parse_stream(ini_reader reader, void* stream, ini_handler handler,
+                     void* user);
+/* Nonzero to allow multi-line value parsing, in the style of Python's
+   configparser. If allowed, ini_parse() will call the handler with the same
+   name for each subsequent line parsed. */
+#ifndef INI_ALLOW_MULTILINE
+#define INI_ALLOW_MULTILINE 1
+#endif
+/* Nonzero to allow a UTF-8 BOM sequence (0xEF 0xBB 0xBF) at the start of
+   the file. See http://code.google.com/p/inih/issues/detail?id=21 */
+#ifndef INI_ALLOW_BOM
+#define INI_ALLOW_BOM 1
+#endif
+/* Nonzero to allow inline comments (with valid inline comment characters
+   specified by INI_INLINE_COMMENT_PREFIXES). Set to 0 to turn off and match
+   Python 3.2+ configparser behaviour. */
+#ifndef INI_ALLOW_INLINE_COMMENTS
+#define INI_ALLOW_INLINE_COMMENTS 1
+#endif
+#ifndef INI_INLINE_COMMENT_PREFIXES
+#define INI_INLINE_COMMENT_PREFIXES ";"
+#endif
+/* Nonzero to use stack, zero to use heap (malloc/free). */
+#ifndef INI_USE_STACK
+#define INI_USE_STACK 1
+#endif
+/* Stop parsing on first error (default is to keep parsing). */
+#ifndef INI_STOP_ON_FIRST_ERROR
+#define INI_STOP_ON_FIRST_ERROR 0
+#endif
+/* Maximum line length for any line in INI file. */
+#ifndef INI_MAX_LINE
+#define INI_MAX_LINE 200
+#endif
+#ifdef __cplusplus
+}
+#endif
+/* inih -- simple .INI file parser
+inih is released under the New BSD license (see LICENSE.txt). Go to the project
+home page for more info:
+https://github.com/benhoyt/inih
+*/
+#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS)
+#define _CRT_SECURE_NO_WARNINGS
+#endif
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#if !INI_USE_STACK
+#include <stdlib.h>
+#endif
+#define MAX_SECTION 50
+#define MAX_NAME 50
+/* Strip whitespace chars off end of given string, in place. Return s. */
+inline static char* rstrip(char* s)
+{
+    char* p = s + strlen(s);
+    while (p > s && isspace((unsigned char)(*--p)))
+        *p = '\0';
+    return s;
+}
+/* Return pointer to first non-whitespace char in given string. */
+inline static char* lskip(const char* s)
+{
+    while (*s && isspace((unsigned char)(*s)))
+        s++;
+    return (char*)s;
+}
+/* Return pointer to first char (of chars) or inline comment in given string,
+   or pointer to null at end of string if neither found. Inline comment must
+   be prefixed by a whitespace character to register as a comment. */
+inline static char* find_chars_or_comment(const char* s, const char* chars)
+{
+#if INI_ALLOW_INLINE_COMMENTS
+    int was_space = 0;
+    while (*s && (!chars || !strchr(chars, *s)) &&
+           !(was_space && strchr(INI_INLINE_COMMENT_PREFIXES, *s))) {
+        was_space = isspace((unsigned char)(*s));
+        s++;
+    }
+#else
+    while (*s && (!chars || !strchr(chars, *s))) {
+        s++;
+    }
+#endif
+    return (char*)s;
+}
+/* Version of strncpy that ensures dest (size bytes) is null-terminated. */
+inline static char* strncpy0(char* dest, const char* src, size_t size)
+{
+    strncpy(dest, src, size);
+    dest[size - 1] = '\0';
+    return dest;
+}
+/* See documentation in header file. */
+inline int ini_parse_stream(ini_reader reader, void* stream, ini_handler handler,
+                     void* user)
+{
+    /* Uses a fair bit of stack (use heap instead if you need to) */
+#if INI_USE_STACK
+    char line[INI_MAX_LINE];
+#else
+    char* line;
+#endif
+    char section[MAX_SECTION] = "";
+    char prev_name[MAX_NAME] = "";
+    char* start;
+    char* end;
+    char* name;
+    char* value;
+    int lineno = 0;
+    int error = 0;
+#if !INI_USE_STACK
+    line = (char*)malloc(INI_MAX_LINE);
+    if (!line) {
+        return -2;
+    }
+#endif
+    /* Scan through stream line by line */
+    while (reader(line, INI_MAX_LINE, stream) != NULL) {
+        lineno++;
+        start = line;
+#if INI_ALLOW_BOM
+        if (lineno == 1 && (unsigned char)start[0] == 0xEF &&
+                           (unsigned char)start[1] == 0xBB &&
+                           (unsigned char)start[2] == 0xBF) {
+            start += 3;
+        }
+#endif
+        start = lskip(rstrip(start));
+        if (*start == ';' || *start == '#') {
+            /* Per Python configparser, allow both ; and # comments at the
+               start of a line */
+        }
+#if INI_ALLOW_MULTILINE
+        else if (*prev_name && *start && start > line) {
+#if INI_ALLOW_INLINE_COMMENTS
+        end = find_chars_or_comment(start, NULL);
+        if (*end)
+            *end = '\0';
+        rstrip(start);
+#endif
+            /* Non-blank line with leading whitespace, treat as continuation
+               of previous name's value (as per Python configparser). */
+            if (!handler(user, section, prev_name, start) && !error)
+                error = lineno;
+        }
+#endif
+        else if (*start == '[') {
+            /* A "[section]" line */
+            end = find_chars_or_comment(start + 1, "]");
+            if (*end == ']') {
+                *end = '\0';
+                strncpy0(section, start + 1, sizeof(section));
+                *prev_name = '\0';
+            }
+            else if (!error) {
+                /* No ']' found on section line */
+                error = lineno;
+            }
+        }
+        else if (*start) {
+            /* Not a comment, must be a name[=:]value pair */
+            end = find_chars_or_comment(start, "=:");
+            if (*end == '=' || *end == ':') {
+                *end = '\0';
+                name = rstrip(start);
+                value = lskip(end + 1);
+#if INI_ALLOW_INLINE_COMMENTS
+                end = find_chars_or_comment(value, NULL);
+                if (*end)
+                    *end = '\0';
+#endif
+                rstrip(value);
+                /* Valid name[=:]value pair found, call handler */
+                strncpy0(prev_name, name, sizeof(prev_name));
+                if (!handler(user, section, name, value) && !error)
+                    error = lineno;
+            }
+            else if (!error) {
+                /* No '=' or ':' found on name[=:]value line */
+                error = lineno;
+            }
+        }
+#if INI_STOP_ON_FIRST_ERROR
+        if (error)
+            break;
+#endif
+    }
+#if !INI_USE_STACK
+    free(line);
+#endif
+    return error;
+}
+/* See documentation in header file. */
+inline int ini_parse_file(FILE* file, ini_handler handler, void* user)
+{
+    return ini_parse_stream((ini_reader)fgets, file, handler, user);
+}
+/* See documentation in header file. */
+inline int ini_parse(const char* filename, ini_handler handler, void* user)
+{
+    FILE* file;
+    int error;
+    file = fopen(filename, "r");
+    if (!file)
+        return -1;
+    error = ini_parse_file(file, handler, user);
+    fclose(file);
+    return error;
+}
+#endif /* __INI_H__ */
+#ifndef __INIREADER_H__
+#define __INIREADER_H__
+#include <map>
+#include <set>
+#include <string>
+// Read an INI file into easy-to-access name/value pairs. (Note that I've gone
+// for simplicity here rather than speed, but it should be pretty decent.)
+class INIReader
+{
+public:
+    // Empty Constructor
+    INIReader() {};
+    // Construct INIReader and parse given filename. See ini.h for more info
+    // about the parsing.
+    INIReader(std::string filename);
+    // Construct INIReader and parse given file. See ini.h for more info
+    // about the parsing.
+    INIReader(FILE *file);
+    ~INIReader();
+    // Return the result of ini_parse(), i.e., 0 on success, line number of
+    // first error on parse error, or -1 on file open error.
+    int ParseError() const;
+    // Return the list of sections found in ini file
+    const std::set<std::string>& Sections() const;
+    // Get a string value from INI file, returning default_value if not found.
+    std::string Get(std::string section, std::string name,
+                    std::string default_value) const;
+    std::string Get(std::string section, std::string name) const;
+    // Get an integer (long) value from INI file, returning default_value if
+    // not found or not a valid integer (decimal "1234", "-1234", or hex "0x4d2").
+    long GetInteger(std::string section, std::string name, long default_value) const;
+    long GetInteger(std::string section, std::string name) const;
+    // Get a real (floating point double) value from INI file, returning
+    // default_value if not found or not a valid floating point value
+    // according to strtod().
+    double GetReal(std::string section, std::string name, double default_value) const;
+    // Get a single precision floating point number value from INI file, returning
+    // default_value if not found or not a valid floating point value
+    // according to strtof().
+    float GetFloat(std::string section, std::string name, float default_value) const;
+    float GetFloat(std::string section, std::string name) const;
+    // Get a boolean value from INI file, returning default_value if not found or if
+    // not a valid true/false value. Valid true values are "true", "yes", "on", "1",
+    // and valid false values are "false", "no", "off", "0" (not case sensitive).
+    bool GetBoolean(std::string section, std::string name, bool default_value) const;
+protected:
+    int _error;
+    std::map<std::string, std::string> _values;
+    std::set<std::string> _sections;
+    static std::string MakeKey(std::string section, std::string name);
+    static int ValueHandler(void* user, const char* section, const char* name,
+                            const char* value);
+};
+#endif  // __INIREADER_H__
+#ifndef __INIREADER__
+#define __INIREADER__
+#include <algorithm>
+#include <cctype>
+#include <cstdlib>
+inline INIReader::INIReader(std::string filename)
+{
+    _error = ini_parse(filename.c_str(), ValueHandler, this);
+}
+inline INIReader::INIReader(FILE *file)
+{
+    _error = ini_parse_file(file, ValueHandler, this);
+}
+inline int INIReader::ParseError() const
+{
+    return _error;
+}
+inline INIReader::~INIReader() { }
+inline const std::set<std::string>& INIReader::Sections() const
+{
+    return _sections;
+}
+inline std::string INIReader::Get(std::string section, std::string name, std::string default_value) const
+{
+    std::string key = MakeKey(section, name);
+    return _values.count(key) ? _values.at(key) : default_value;
+}
+inline std::string INIReader::Get(std::string section, std::string name) const
+{
+    std::string key = MakeKey(section, name);
+    if(_values.count(key)) return _values.at(key);
+    else
+    {
+        printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str());
+        exit(-1);
+    }
+}
+inline long INIReader::GetInteger(std::string section, std::string name, long default_value) const
+{
+    std::string valstr = Get(section, name, "");
+    const char* value = valstr.c_str();
+    char* end;
+    // This parses "1234" (decimal) and also "0x4D2" (hex)
+    long n = strtol(value, &end, 0);
+    return end > value ? n : default_value;
+}
+inline long INIReader::GetInteger(std::string section, std::string name) const
+{
+    std::string valstr = Get(section, name, "");
+    const char* value = valstr.c_str();
+    char* end;
+    // This parses "1234" (decimal) and also "0x4D2" (hex)
+    long n = strtol(value, &end, 0);
+    if(end <= value)
+    {
+        printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str());
+        exit(-1);
+    }
+    return n;
+}
+inline double INIReader::GetReal(std::string section, std::string name, double default_value) const
+{
+    std::string valstr = Get(section, name, "");
+    const char* value = valstr.c_str();
+    char* end;
+    double n = strtod(value, &end);
+    return end > value ? n : default_value;
+}
+inline float INIReader::GetFloat(std::string section, std::string name, float default_value) const
+{
+    std::string valstr = Get(section, name, "");
+    const char* value = valstr.c_str();
+    char* end;
+    float n = strtof(value, &end);
+    return end > value ? n : default_value;
+}
+inline float INIReader::GetFloat(std::string section, std::string name) const
+{
+    std::string valstr = Get(section, name, "");
+    const char* value = valstr.c_str();
+    char* end;
+    float n = strtof(value, &end);
+    if(end <= value)
+    {
+        printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str());
+        exit(-1);
+    }
+    return n;
+}
+inline bool INIReader::GetBoolean(std::string section, std::string name, bool default_value) const
+{
+    std::string valstr = Get(section, name, "");
+    // Convert to lower case to make string comparisons case-insensitive
+    std::transform(valstr.begin(), valstr.end(), valstr.begin(), ::tolower);
+    if (valstr == "true" || valstr == "yes" || valstr == "on" || valstr == "1")
+        return true;
+    else if (valstr == "false" || valstr == "no" || valstr == "off" || valstr == "0")
+        return false;
+    else
+        return default_value;
+}
+inline std::string INIReader::MakeKey(std::string section, std::string name)
+{
+    std::string key = section + "=" + name;
+    // Convert to lower case to make section/name lookups case-insensitive
+    std::transform(key.begin(), key.end(), key.begin(), ::tolower);
+    return key;
+}
+inline int INIReader::ValueHandler(void* user, const char* section, const char* name,
+                            const char* value)
+{
+    INIReader* reader = (INIReader*)user;
+    std::string key = MakeKey(section, name);
+    if (reader->_values[key].size() > 0)
+        reader->_values[key] += "\n";
+    reader->_values[key] += value;
+    reader->_sections.insert(section);
+    return 1;
+}
+#endif  // __INIREADER__
\ No newline at end of file
--- a/3rdparty/Megatron-LM/.gitignore
+++ b/3rdparty/Megatron-LM/.gitignore
+__pycache__
--- a/3rdparty/Megatron-LM/.gitlab-ci.yml
+++ b/3rdparty/Megatron-LM/.gitlab-ci.yml
+image: gitlab-master.nvidia.com/dl/dgx/pytorch:20.12-py3-devel
+test:
+  script:
+    - pytest --junitxml=report.xml tests
+  artifacts:
+    when: always
+    reports:
+      junit: report.xml
\ No newline at end of file
--- a/3rdparty/Megatron-LM/LICENSE
+++ b/3rdparty/Megatron-LM/LICENSE
+The following applies to all files unless otherwise noted:
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+--
+This repository also contains code from Hugging Face Inc., Google Research,
+Facebook (from their Fairseq project), and Philip Popien. Files from these
+organizations have notices at the top of each file. Below are licenses
+used in those files, as indicated.
+------------- LICENSE FOR huggingface and Google Research code  --------------
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+------------- LICENSE FOR Facebook Fairseq code --------------
+MIT License
+Copyright (c) Facebook, Inc. and its affiliates.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/3rdparty/Megatron-LM/README.md
+++ b/3rdparty/Megatron-LM/README.md
--- a/3rdparty/Megatron-LM/examples/evaluate_retriever_nq.sh
+++ b/3rdparty/Megatron-LM/examples/evaluate_retriever_nq.sh
+#!/bin/bash
+# Evaluate natural question test data given Wikipedia embeddings and pretrained
+# ICT model or a finetuned model for Natural Question task
+# Datasets can be downloaded from the following link:
+# https://github.com/facebookresearch/DPR/blob/master/data/download_data.py
+EVIDENCE_DATA_DIR=<Specify path of Wikipedia dataset>
+EMBEDDING_PATH=<Specify path of the embeddings>
+CHECKPOINT_PATH=<Specify path of pretrained ICT model or finetuned model>
+QA_FILE=<Path of the natural question dev or test dataset>
+python tasks/main.py \
+    --task RETRIEVER-EVAL \
+    --tokenizer-type BertWordPieceLowerCase \
+    --num-layers 12 \
+    --hidden-size 768 \
+    --num-attention-heads 12 \
+    --tensor-model-parallel-size 1 \
+    --micro-batch-size 128 \
+    --activations-checkpoint-method uniform \
+    --seq-length 512 \
+    --max-position-embeddings 512 \
+    --load ${CHECKPOINT_PATH} \
+    --evidence-data-path ${EVIDENCE_DATA_DIR} \
+    --embedding-path ${EMBEDDING_PATH} \
+    --retriever-seq-length 256 \
+    --vocab-file  bert-vocab.txt\
+    --qa-data-test ${QA_FILE} \
+    --faiss-use-gpu \
+    --retriever-report-topk-accuracies 1 5 20 100 \
+    --fp16 \
+    --indexer-log-interval 1000 \
+    --indexer-batch-size 128
--- a/3rdparty/Megatron-LM/examples/evaluate_zeroshot_gpt.sh
+++ b/3rdparty/Megatron-LM/examples/evaluate_zeroshot_gpt.sh
+#!/bin/bash
+WORLD_SIZE=8
+DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
+                  --nnodes 1 \
+                  --node_rank 0 \
+                  --master_addr localhost \
+                  --master_port 6000"
+TASK="LAMBADA"
+VALID_DATA=<lambada path>
+VOCAB_FILE=gpt2-vocab.json
+MERGE_FILE=gpt2-merges.txt
+CHECKPOINT=checkpoints/gpt2_345m
+python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
+               --task $TASK \
+               --valid-data $VALID_DATA \
+               --tokenizer-type GPT2BPETokenizer \
+               --strict-lambada \
+               --vocab-file $VOCAB_FILE \
+               --merge-file $MERGE_FILE \
+               --load $CHECKPOINT \
+               --tensor-model-parallel-size 1 \
+               --num-layers 24 \
+               --hidden-size 1024 \
+               --num-attention-heads 16 \
+               --batch-size 8 \
+               --activations-checkpoint-method uniform \
+               --seq-length 1024 \
+               --max-position-embeddings 1024 \
+               --log-interval 10 \
+               --fp16 \
+               --no-load-optim \
+               --no-load-rng
--- a/3rdparty/Megatron-LM/examples/finetune_mnli_distributed.sh
+++ b/3rdparty/Megatron-LM/examples/finetune_mnli_distributed.sh
+#!/bin/bash
+WORLD_SIZE=8
+DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
+                  --nnodes 1 \
+                  --node_rank 0 \
+                  --master_addr localhost \
+                  --master_port 6000"
+TRAIN_DATA="data/glue_data/MNLI/train.tsv"
+VALID_DATA="data/glue_data/MNLI/dev_matched.tsv \
+            data/glue_data/MNLI/dev_mismatched.tsv"
+PRETRAINED_CHECKPOINT=checkpoints/bert_345m
+VOCAB_FILE=bert-vocab.txt
+CHECKPOINT_PATH=checkpoints/bert_345m_mnli
+python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
+               --task MNLI \
+               --seed 1234 \
+               --train-data $TRAIN_DATA \
+               --valid-data $VALID_DATA \
+               --tokenizer-type BertWordPieceLowerCase \
+               --vocab-file $VOCAB_FILE \
+               --epochs 5 \
+               --pretrained-checkpoint $PRETRAINED_CHECKPOINT \
+               --tensor-model-parallel-size 1 \
+               --num-layers 24 \
+               --hidden-size 1024 \
+               --num-attention-heads 16 \
+               --micro-batch-size 8 \
+               --activations-checkpoint-method uniform \
+               --lr 5.0e-5 \
+               --lr-decay-style linear \
+               --lr-warmup-fraction 0.065 \
+               --seq-length 512 \
+               --max-position-embeddings 512 \
+               --save-interval 500000 \
+               --save $CHECKPOINT_PATH \
+               --log-interval 10 \
+               --eval-interval 100 \
+               --eval-iters 50 \
+               --weight-decay 1.0e-1 \
+               --fp16
--- a/3rdparty/Megatron-LM/examples/finetune_race_distributed.sh
+++ b/3rdparty/Megatron-LM/examples/finetune_race_distributed.sh
+#!/bin/bash
+WORLD_SIZE=8
+DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
+                  --nnodes 1 \
+                  --node_rank 0 \
+                  --master_addr localhost \
+                  --master_port 6000"
+TRAIN_DATA="data/RACE/train/middle"
+VALID_DATA="data/RACE/dev/middle \
+            data/RACE/dev/high"
+VOCAB_FILE=bert-vocab.txt
+PRETRAINED_CHECKPOINT=checkpoints/bert_345m
+CHECKPOINT_PATH=checkpoints/bert_345m_race
+python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
+               --task RACE \
+               --seed 1234 \
+               --train-data $TRAIN_DATA \
+               --valid-data $VALID_DATA \
+               --tokenizer-type BertWordPieceLowerCase \
+               --vocab-file $VOCAB_FILE \
+               --epochs 3 \
+               --pretrained-checkpoint $PRETRAINED_CHECKPOINT \
+               --tensor-model-parallel-size 1 \
+               --num-layers 24 \
+               --hidden-size 1024 \
+               --num-attention-heads 16 \
+               --micro-batch-size 4 \
+               --activations-checkpoint-method uniform \
+               --lr 1.0e-5 \
+               --lr-decay-style linear \
+               --lr-warmup-fraction 0.06 \
+               --seq-length 512 \
+               --max-position-embeddings 512 \
+               --save-interval 100000 \
+               --save $CHECKPOINT_PATH \
+               --log-interval 10 \
+               --eval-interval 100 \
+               --eval-iters 50 \
+               --weight-decay 1.0e-1 \
+               --clip-grad 1.0 \
+               --hidden-dropout 0.1 \
+               --attention-dropout 0.1 \
+               --fp16
--- a/3rdparty/Megatron-LM/examples/finetune_retriever_distributed.sh
+++ b/3rdparty/Megatron-LM/examples/finetune_retriever_distributed.sh
+#!/bin/bash
+# Finetune a BERT or pretrained ICT model using Google natural question data 
+# Datasets can be downloaded from the following link:
+# https://github.com/facebookresearch/DPR/blob/master/data/download_data.py
+WORLD_SIZE=8
+DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE \
+                  --nnodes 1 \
+                  --node_rank 0 \
+                  --master_addr localhost \
+                  --master_port 6000"
+CHECKPOINT_PATH=<Specify path for the finetuned retriever model>
+# Load either of the below
+BERT_LOAD_PATH=<Path of BERT pretrained model>
+PRETRAINED_CHECKPOINT=<Path of Pretrained ICT model>
+python -m torch.distributed.launch $DISTRIBUTED_ARGS ./tasks/main.py \
+        --task RET-FINETUNE-NQ \
+        --train-with-neg \
+        --train-hard-neg 1 \
+        --pretrained-checkpoint ${PRETRAINED_CHECKPOINT} \
+        --num-layers 12 \
+        --hidden-size 768 \
+        --num-attention-heads 12 \
+        --tensor-model-parallel-size 1 \
+        --tokenizer-type BertWordPieceLowerCase \
+        --train-data nq-train.json \
+        --valid-data nq-dev.json \
+        --save ${CHECKPOINT_PATH} \
+        --load ${CHECKPOINT_PATH} \
+        --vocab-file bert-vocab.txt \
+        --bert-load ${BERT_LOAD_PATH} \
+        --save-interval 5000 \
+        --log-interval 10 \
+        --eval-interval 20000 \
+        --eval-iters 100 \
+        --indexer-log-interval 1000 \
+        --faiss-use-gpu \
+        --DDP-impl torch \
+        --fp16 \
+        --retriever-report-topk-accuracies 1 5 10 20 100 \
+        --seq-length 512 \
+        --retriever-seq-length 256 \
+        --max-position-embeddings 512 \
+        --retriever-score-scaling \
+        --epochs 80 \
+        --micro-batch-size 8 \
+        --eval-micro-batch-size 16 \
+        --indexer-batch-size 128 \
+        --lr 2e-5 \
+        --lr-warmup-fraction 0.01 \
+        --weight-decay 1e-1
--- a/3rdparty/Megatron-LM/examples/merge_mp_bert.sh
+++ b/3rdparty/Megatron-LM/examples/merge_mp_bert.sh
+#!/bin/bash
+TENSOR_MODEL_PARALLEL_SIZE=2
+VOCAB_FILE=bert-vocab.txt
+CHECKPOINT_PATH=checkpoints/bert_345m
+WORLD_SIZE=$TENSOR_MODEL_PARALLEL_SIZE python tools/merge_mp_partitions.py \
+                                --model-type BERT \
+                                --tensor-model-parallel-size $TENSOR_MODEL_PARALLEL_SIZE \
+                                --tokenizer-type BertWordPieceLowerCase \
+                                --vocab-file $VOCAB_FILE \
+                                --num-layers 24 \
+                                --hidden-size 1024 \
+                                --num-attention-heads 16 \
+                                --seq-length 512 \
+                                --max-position-embeddings 512 \
+                                --load $CHECKPOINT_PATH
--- a/3rdparty/Megatron-LM/examples/msdp/README.md
+++ b/3rdparty/Megatron-LM/examples/msdp/README.md
+# Multi-Stage Prompting for Knowledgeable Dialogue Generation
+This directory contains all the scripts of multi-stage prompting for knowledgeable dialogue generation that includes data preparation, and knowledge and response generations. More details are available on [`knowledgeable task directory`](../../tasks/msdp).
--- a/3rdparty/Megatron-LM/examples/msdp/data_processing.sh
+++ b/3rdparty/Megatron-LM/examples/msdp/data_processing.sh
+#!/bin/bash
+# Data preparation for our framework: preprocessing the WoW and WoI datasets
+# The datasets can be downloaded through the following links:
+# WoW: https://parl.ai/projects/wizard_of_wikipedia/
+# WoI: https://parl.ai/projects/sea/
+DIR=`pwd`
+# Before running the preprocessing, please download 
+# the wizard of wikipedia and wizard datasets
+WOW_DATA_FOLDER=<PATH_OF_WIZARD_OF_WIKIPEDIA_DATA_FOLDER>
+WOI_DATA_FOLDER=<PATH_OF_WIZARD_OF_INTERNET_DATA_FOLDER>
+# We provide examples for processing the raw data from Wizard of Wikipedia
+# Processing the train dataset (train.json)
+python ${DIR}/tasks/msdp/preprocessing.py \
+        --func process_wow_dataset \
+        --raw_file ${WOW_DATA_FOLDER}/train.json \
+        --processed_file ${WOW_DATA_FOLDER}/train_processed.txt
+# Processing test seen dataset (test_random_split.json)
+python ${DIR}/tasks/msdp/preprocessing.py \
+        --func process_wow_dataset \
+        --raw_file ${WOW_DATA_FOLDER}/test_random_split.json \
+        --processed_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
+        --knwl_ref_file ${WOW_DATA_FOLDER}/output_testseen_knowledge_reference.txt \
+        --resp_ref_file ${WOW_DATA_FOLDER}/output_testseen_response_reference.txt
+# processing test unseen dataset (test_topic_split.json)
+python ${DIR}/tasks/msdp/preprocessing.py \
+        --func process_wow_dataset \
+        --raw_file ${WOW_DATA_FOLDER}/test_topic_split.json \
+        --processed_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
+        --knwl_ref_file ${WOW_DATA_FOLDER}/output_testunseen_knowledge_reference.txt \
+        --resp_ref_file ${WOW_DATA_FOLDER}/output_testunseen_response_reference.txt
+# We provide the following script to process the raw data from Wizard of Internet
+# Processing the test dataset (test.jsonl)
+python ${DIR}/tasks/msdp/preprocessing.py \
+        --func process_woi_dataset \
+        --raw_file ${WOI_DATA_FOLDER}/test.jsonl \
+        --processed_file ${WOI_DATA_FOLDER}/test_processed.txt \
+        --knwl_ref_file ${WOI_DATA_FOLDER}/output_test_knowledge_reference.txt \
+        --resp_ref_file ${WOI_DATA_FOLDER}/output_test_response_reference.txt
+# Get the knowledge generation prompts for the each test dataset in WoW and WoI
+MODEL_FILE=<PATH_OF_THE_FINETUNED_DPR_MODEL> 
+# WoW test seen
+python ${DIR}/tasks/msdp/preprocessing.py \
+        --func get_knwl_gen_prompts \
+        --test_file ${WOW_DATA_FOLDER}/testseen_processed.txt \
+        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
+        --model_file ${MODEL_FILE} \
+        --processed_file ${WOW_DATA_FOLDER}/output_testseen_knowledge_prompts.json \
+        --data_type wow_seen
+# WoW test unseen
+python ${DIR}/tasks/msdp/preprocessing.py \
+        --func get_knwl_gen_prompts \
+        --test_file ${WOW_DATA_FOLDER}/testunseen_processed.txt \
+        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
+        --model_file ${MODEL_FILE} \
+        --processed_file ${WOW_DATA_FOLDER}/output_testunseen_knowledge_prompts.json \
+        --data_type wow_unseen
+# WoI
+python ${DIR}/tasks/msdp/preprocessing.py \
+        --func get_knwl_gen_prompts \
+        --test_file ${WOI_DATA_FOLDER}/test_processed.txt \
+        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
+        --model_file ${MODEL_FILE} \
+        --processed_file ${WOI_DATA_FOLDER}/output_test_knowledge_prompts.json \
+        --data_type woi
+# Get the response generation prompts (can be applied for all the test datasets)
+python ${DIR}/tasks/msdp/preprocessing.py \
+        --func get_resp_gen_prompts \
+        --train_file ${WOW_DATA_FOLDER}/train_processed.txt \
+        --processed_file ${WOW_DATA_FOLDER}/output_response_prompts.txt