#!/usr/bin/perl -w

use 5.006; use v5.10.1;
use warnings;
use File::Basename;
use FindBin;
use strict;

# action meaning in the arr:
# "W"   Warning: warning and drop the option. The option may effect correctness
# "I"   Ignore:  just drop the option and its paras. The option not effect correctness.
# "E"   Error:  not allow to use this option.
# ""    Default: default is ok for transform (fake option or hipcc/clang option)


# TODO: para_num currently only indicates whether there are parameters. By default, 
#       each compilation option can parse at most one parameter. (true or false)
#       However, there are two situations that need to be considered in the future:
# - List options must be able to be parameterized by ",". (-Idir1,dir2 -I=dir3 -I dir4,dir5)
# - The compile option really has n parameters. (--forward-unknown-to-host-compiler)
package nvcc_helper;
my @nvcc_paras_arr = (
# long_option                            short_option                    para_num   action   hipcc_option   content
["--allow-unsupported-compiler",         "-allow-unsupported-compiler",      "0",    "I",    "",             "Ignore"],
["--archive-options",                    "-Xarchive",                        "1",    "",    "-Xarchive",     "Ignore"],
["--archiver-binary",                    "-arbin",                           "1",    "W",    "",             ""],
["--augment-host-linker-script ",        "-aug-hls",                         "0",    "W",    "",             ""],
["--clean-targets",                      "-clean",                           "0",    "W",    "",             ""],
["--compile",                            "-c",                               "0",    "",     "-c",             ""],
["--compile-as-tools-patch",             "-astoolspatch",                    "0",    "W",    "",             ""],
["--compiler-bindir",                    "-ccbin",                           "1",    "W",    "",             ""],
["--compiler-options",                   "-Xcompiler",                       "1",    "",     "-Xcompiler",             ""],
["--cubin",                              "-cubin",                           "0",    "",     "--genco",             ""],  # hip not support cubin, but can use --genco
["--cuda",                               "-cuda",                            "0",    "",     "-E",             ""],
["--cudadevrt",                          "-cudadevrt",                       "1",    "I",    "",             "Ignore"],
["--cudart",                             "-cudart",                          "1",    "I",    "",             "Ignore"],
["--debug",                              "-g",                               "0",    "",     "-g",           ""],
["--default-stream",                     "-default-stream",                  "1",    "",     "-fgpu-default-stream",             ""],
["--define-macro",                       "-D",                               "1",    "",     "-D",           ""],
["--dependency-drive-prefix",            "-ddp",                             "1",    "I",    "",             "Ignore"],
["--dependency-output",                  "-MF",                              "1",    "",     "-MF",             ""],
["--dependency-target-name",             "-MT",                              "1",    "",     "-MT",             ""],
["--device-c",                           "-dc",                              "0",    "",     "-c -fgpu-rdc",             ""],
["--device-debug",                       "-G",                               "0",    "",     "-g",             ""],
["--device-link",                        "-dlink",                           "0",    "",     "-dlink",             ""],   # Need a fake dlink process
["--device-w",                           "-dw",                              "0",    "",     "-c",             ""],
["--diag-error",                         "-diag-error",                      "1",    "I",    "",             "Ignore"],
["--diag-suppress",                      "-diag-suppress",                   "1",    "I",    "",             "Ignore"],
["--diag-warn",                          "-diag-warn",                       "1",    "I",    "",             "Ignore"],
["--disable-warnings",                   "-w",                               "0",    "",     "-w",             ""],
["--display-error-number",               "-err-no",                          "0",    "I",    "",             "Ignore"],
["--dlink-time-opt",                     "-dlto",                            "0",    "I",    "",             "Ignore"],
["--dont-use-profile",                   "-noprof",                          "0",    "I",    "",             "Ignore"],
["--dopt",                               "-dopt",                            "1",    "W",    "",             ""],
["--drive-prefix",                       "-dp",                              "1",    "I",    "",             "Ignore"],
["--dryrun",                             "-dryrun",                          "0",    "",     "-###",             ""],
["--entries",                            "-e",                               "1",    "I",    "",             "Ignore"],
["--expt-extended-lambda",               "-expt-extended-lambda",            "0",    "I",     "",   ""], # support by default
["--expt-relaxed-constexpr",             "-expt-relaxed-constexpr",          "0",    "W",    "",             ""],
["--extended-lambda",                    "-extended-lambda",                 "0",    "I",     "",   ""], # support by default
["--extensible-whole-program",           "-ewp",                             "0",    "I",    "",             "Ignore"],
["--extra-device-vectorization",         "-extra-device-vectorization",      "0",    "I",    "",             "Ignore"],
["--fatbin",                             "-fatbin",                          "0",    "",     "--genco",             ""],
["--fmad",                               "-fmad",                            "1",    "I",    "",             "Ignore"],
["--forward-unknown-opts",               "-forward-unknown-opts",            "1",    "W",    "",             "Current not support -forward-unknown-opts"],   # TODO warning forward unknown opts
["--forward-unknown-to-host-compiler",   "-forward-unknown-to-host-compiler","1",    "W",    "",             "Current not support -forward-unknown-to-host-compiler"],
["--forward-unknown-to-host-linker",     "-forward-unknown-to-host-linker",  "1",    "W",    "",             "Current not support -forward-unknown-to-host-linker"],
["--ftemplate-backtrace-limit",          "-ftemplate-backtrace-limit",       "1",    "I",    "",             "Ignore"],
["--ftemplate-depth",                    "-ftemplate-depth",                 "1",    "I",    "",             "Ignore"],
["--ftz",                                "-ftz",                             "1",    "W",    "",             ""],   # TODO  -fcuda-flush-denormals-to-zero
["--generate-code",                      "-gencode",                         "1",    "",     "--offload-arch",             ""],
["--generate-dependencies",              "-M",                               "0",    "",     "-M",             ""],
["--generate-dependencies-with-compile", "-MD",                              "0",    "",     "-MD",             ""],
["--generate-dependency-targets",        "-MP",                              "0",    "",     "-MP",             ""],
["--generate-line-info",                 "-lineinfo",                        "0",    "W",    "",             "Current not support output line-number information!"],
["--generate-nonsystem-dependencies",    "-MM",                              "0",    "",     "-MM",             ""],
["--generate-nonsystem-dependencies-with-compile",    "-MMD",                "0",    "",     "-MMD",             ""],
["--gpu-architecture",                   "-arch",                            "1",    "",     "--offload-arch",             ""],
["--gpu-code",                           "-code",                            "1",    "",     "--offload-arch",             ""],
["--help",                               "-h",                               "0",    "",     "-help",             ""],
["--host-linker-script",                 "-hls",                             "1",    "W",    "",             ""],
["--host-relocatable-link",              "-r",                               "0",    "W",    "",             ""],
["--include-path",                       "-I",                               "1",    "",     "-I",             ""],
["--input-drive-prefix",                 "-idp",                             "1",    "I",    "",             "Ignore"],
["--keep",                               "-keep",                            "0",    "",     "-save-temps",             ""],
["--keep-device-functions",              "-keep-device-functions",           "0",    "W",    "",             ""],
["--keep-dir",                           "-keep-dir",                        "1",    "W",    "",             ""],   # -TODO save-temps=
["--lib",                                "-lib",                             "0",    "",     "-lib",             ""],
["--libdevice-directory",                "-ldir",                            "1",    "W",    "",             ""],
["--library",                            "-l",                               "1",    "",     "-l",             ""],
["--library-path",                       "-L",                               "1",    "",     "-L",             ""],
["--link",                               "-link",                            "0",    "I",    "",             "Ignore"],
["--linker-options",                     "-Xlinker",                         "1",    "",     "-Xlinker",             ""],
["--list-gpu-arch",                      "-arch-ls",                         "0",    "W",    "",             ""],       # TODO list gfx906
["--list-gpu-code",                      "-code-ls",                         "0",    "W",    "",             ""],       # TODO
["--lto",                                "-lto",                             "0",    "I",    "",             "Ignore"],
["--m32",                                "-m32",                             "0",    "I",    "",             "Ignore"],
["--m64",                                "-m64",                             "0",    "I",    "",             "Ignore"],
["--machine",                            "",                                 "1",    "I",    "",             "Ignore"],   # remove short version -m for -msse4.1 in openmm
["--maxrregcount",                       "-maxrregcount",                    "1",    "I",    "",             "Ignore"],
["--no-align-double",                    "--no-align-double",                "0",    "I",    "",             "Ignore"],
["--no-compress",                        "-no-compress",                     "0",    "I",    "",             "Ignore"],
["--no-device-link",                     "-nodlink",                         "0",    "",     "-nogpulib",             ""],
["--no-display-error-number",            "-no-err-no",                       "0",    "I",    "",             "Ignore"],
["--no-exceptions",                      "-noeh",                            "0",    "W",    "",             ""],   # TODO -fexceptions -fno-exceptions
["--no-host-device-initializer-list",    "-nohdinitlist",                    "0",    "I",    "",             "Ignore"],
["--no-host-device-move-forward",        "-nohdmoveforward",                 "0",    "I",    "",             "Ignore"],
["--nvlink-options",                     "-Xnvlink",                         "1",    "",    "-Xnvlink",      "Ignore"],
["--objdir-as-tempdir",                  "-objtemp",                         "0",    "W",    "",             ""],
["--optimization-info",                  "-opt-info",                        "1",    "W",    "",             ""],
["--optimize",                           "-O",                               "1",    "",     "-O",             ""],
["--options-file",                       "-optf",                            "1",    "W",    "",             "Current not support read options from file!"], # adapt to higher version cmake
["--optix-ir",                           "-optix-ir",                        "0",    "",     "-c --cuda-device-only",             ""],
["--output-directory",                   "-odir",                            "1",    "W",    "",             ""],
["--output-file",                        "-o",                               "1",    "",     "-o",             ""],
["--pre-include",                        "-include",                         "1",    "",     "--include",             ""],
["--prec-div",                           "-prec-div",                        "1",    "W",    "",             ""],   # TODO -freciprocal-math
["--prec-sqrt",                          "-prec-sqrt",                       "1",    "W",    "",             ""],
["--preprocess",                         "-E",                               "0",    "",     "-E",             ""],
["--profile",                            "-pg",                              "0",    "W",    "",             ""],
["--ptx",                                "-ptx",                             "0",    "",     "-c --cuda-device-only",             ""],
["--ptxas-options",                      "-Xptxas",                          "1",    "",     "-Xptxas",      "Ignore"],
["--qpp-config",                         "-qpp-config",                      "1",    "I",    "",             "Ignore"],
["--relocatable-device-code",            "-rdc",                             "1",    "",     "-rdc",             ""],
["--resource-usage",                     "-res-usage",                       "0",    "I",    "",             "Ignore"],
["--restrict",                           "-restrict",                        "0",    "W",    "",             ""],
["--run",                                "-run",                             "0",    "W",    "",             ""],
["--run-args",                           "-run-args",                        "1",    "W",    "",             ""],
["--save-temps",                         "-save-temps",                      "0",    "",     "-save-temps",             ""],
["--shared",                             "-shared",                          "0",    "",     "-shared",             ""],
["--source-in-ptx",                      "-src-in-ptx",                      "0",    "W",    "",             ""],
["--std",                                "-std",                             "1",    "",     "--std",             ""],
["--system-include",                     "-isystem",                         "1",    "",     "-isystem",             ""],
["--target-directory",                   "-target-dir",                      "1",    "W",    "",             ""],
["--threads",                            "-t",                               "1",    "W",    "",             ""],
["--time",                               "-time",                            "1",    "W",    "",             ""],
["--undefine-macro",                     "-U",                               "1",    "",     "-U",             ""],
["--use-local-env",                      "-use-local-env",                   "0",    "I",    "",             "Ignore"],
["--use_fast_math",                      "-use_fast_math",                   "0",    "",     "-D__FAST_MATH__",             ""],
["--verbose",                            "-v",                               "0",    "",     "-v",             ""],
["--version",                            "-V",                               "0",    "",     "--version",             ""],
["--version-ident",                      "-dQ",                              "1",    "I",    "",             "Ignore"],
["--Wdefault-stream-launch",             "-Wdefault-stream-launch",          "0",    "W",    "",             ""],       # TODO
["--Werror",                             "-Werror",                          "1",    "I",    "",             "Ignore"],     # For improve compatibility
["--Wext-lambda-captures-this",          "-Wext-lambda-captures-this",       "0",    "I",    "",             "Ignore"],
["--Wmissing-launch-bounds",             "-Wmissing-launch-bounds",          "0",    "I",    "",             "Ignore"],
["--Wno-deprecated-declarations",        "-Wno-deprecated-declarations",     "0",    "I",    "",             "Ignore"],
["--Wno-deprecated-gpu-targets",         "-Wno-deprecated-gpu-targets",      "0",    "I",    "",             "Ignore"],
["--Wreorder",                           "-Wreorder",                        "0",    "W",     "",             ""],
["--x",                                  "-x",                               "1",    "",     "-x",           ""],

# addition flags, not nvcc flags
["--cuda-gpu-arch",                      "",                                 "1",    "",     "--offload-arch",             ""],
["",                                     "-Xcudafe",                         "1",    "",     "-Xcudafe",             "Ignore"],
["",                                     "-Xfabinary",                       "1",    "",     "-Xfabinary",           "Ignore"],
);



# TODO: In the future, options will be distinguished according to the option categories on the CUDA official website.
# (https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html)
# * Options for specifying the compilation phase
# * File and path specifications.
# * Options for specifying behavior of compiler/linker.
# * Options for passing specific phase options
# * Miscellaneous options for guiding the compiler driver.
# * Options for steering GPU code generation.
# * Options for steering cuda compilation.
# * Generic tool options.


## Generic FLAG
use constant {
    FLAG_NONE => 1000,   # For the current parameter, the previous one is not a option.
    FLAG_NORMAL => 1001, # For the current parameter, the previous one is a normal option.
    FLAG_SKIP => 1002,   # For the current parameter, the previous one is an ignored option.
    FLAG_MARCOS => 1003,
};


## Parameters for single-valued options are hip-compatible
use constant {
    FLAG_XCOMPILER => 1100,
    FLAG_OFFLOAD_ARCH => 1101,
    FLAG_X => 1102,
    FLAG_L => 1103,
    FLAG_DEFAULT_STREAM => 1104,
    FLAG_RDC => 1105,
};


## Unsupported toolchain compile options FLAG
use constant {
    FLAG_TOOLCHAIN_INGORE => 1200,
};

my %toolchain_option_map = (
"-Xarchive" => FLAG_TOOLCHAIN_INGORE,
"-Xnvlink" => FLAG_TOOLCHAIN_INGORE,
"-Xptxas" => FLAG_TOOLCHAIN_INGORE,
"-Xcudafe" => FLAG_TOOLCHAIN_INGORE,
"-Xfatbinary" => FLAG_TOOLCHAIN_INGORE,
);


## fake option FLAG
use constant {
    FLAG_LIB => 1300,
    FLAG_DLINK => 1301,
};


## TODO: support for the collapsed option (-lcudart,cuda -> -lcudart -lcuda)
# use constant {
#     FLAG_L => 1400,
#     FLAG_I => 1401,
#     FLAG_l => 1402,
# };


# Additional hip options
use constant OFFLOAD_ARCH_OPTION => "--offload-arch";



my %xcompiler_paras_map = (
"-funroll-all-loops" => "-funroll-loops",
"-fno-unroll-all-loops" => "-fno-unroll-loops"
);

my %knownTargets=("906"=>"1", "926"=>"1", "928"=>"1");
my %matchedTargets=();

my %cmd_options_map=(); # global value, the map of command options
my %parsed_options=();
my %warned_options=();  # options ready to warning
my @args_arr = ();
my @noflag_paras=();    # without option before it


my %macros_options=("-D" => "", "-U" => "");
my %macros_map=();
# my %math_lib_map=("cublas" => "hipblas", "cufft" => "hipfft", "curand" => "hiprand", "cusparse" => "hipsparse");
my %math_lib_map=();

my $warning_offload_arch = "nvcc current only support gfx906,gfx926,gfx928 arch. All architecture parameters will be replaced by gfx906,gfx926,gfx928 arch.";

my $need_add_rdc=0;

sub print_map {
    print "print maps\n";
    my %local_map = @_;
    foreach my $key (keys %local_map) {
        my $value = $local_map{$key};
        print "key $key: ";
        foreach (@{$value}) {
            print "$_\t";
        }
        print "\n";
    }
}


# sub replace_marcos{
#     my ($opt_ref, $macro_ref) = @_;
#     if (exists $macros_options{$opt_ref}) {
#         if (exists $macros_map{$$macro_ref}) {
#             # print "replace marcros: $$macro_ref ";
#             $$macro_ref = $macros_map{$$macro_ref};
#             # print "by $$macro_ref\n";
#         }
#     }
# }

sub paras_arr_to_map {
    my (@arr) = @_;
    foreach my $row (@arr) {
        my $long_opt = $row->[0];
        my $short_opt = $row->[1];
        my @tmparray = ($row->[4], $row->[2], $row->[3], $row->[5]);   # hipcc_opt, para_num, action, $content
                if (length $long_opt) {
            @{$cmd_options_map{$long_opt}} = @tmparray;
        }
        if (length $short_opt) {
            @{$cmd_options_map{$short_opt}} = @tmparray;
        }
    }
}


# paras: mapref key value
# @_: %parsed_options, $curr_opt, $arg
sub add_value_to_map{
    my ($mapref, $key, $val)=@_;
    # print "insert $key $val \n";
    if (not exists $$mapref{$key}) {
        $mapref->{$key}=[];
    }
    push(@{$$mapref{$key}}, $val);
}

sub replace_paras {
    __replace_paras(@_);
    # push(@args_arr, OFFLOAD_ARCH_OPTION."=gfx906");
    # push(@args_arr, OFFLOAD_ARCH_OPTION."=gfx926");
    # push(@args_arr, OFFLOAD_ARCH_OPTION."=gfx928");
    foreach my $target (keys %matchedTargets) {
        if ($matchedTargets{$target}) {
            push(@args_arr, OFFLOAD_ARCH_OPTION . "=gfx$target");
        }
    }
}

sub __replace_paras {
    my $para_num=1;
    my $curr_opt="";
    my $flag = FLAG_NONE;

    foreach my $arg (@_) {   
        # Force ignore unsupported toolchain options, and option parameters.
        # PS: next option or next parameter
        if ($flag == FLAG_TOOLCHAIN_INGORE) {
            $flag = FLAG_NONE;
            next;
        }

        # regex match result: $1 (--options) $2 (--|-) $3 ([=/]?.*)
        if ($arg =~ m/^((--|-)[^=\/]+)([=\/]?.*)/ && $flag != FLAG_XCOMPILER) {


            # print "\n$arg: ";
            $curr_opt=$1;
            $para_num = 0;
            my $parameter=$3;
            $flag = FLAG_NORMAL;    # default, has only one para     

            # if not exist long option, try the short option (the collapsed option: -Idir1,dir2)
            if (not exists $cmd_options_map{$curr_opt}) {
                my $opt_len = length($2) + 1;
                my $tmp_opt = substr($1, 0, $opt_len);
               
                # Bug: If the collapsed option is in the nvcc-to-hipcc map and is actually a full clang option, then this will be a failed operation.(such as -ggdb/-msse4.1)
                # TODO: In the future, it may be necessary to add a separate recognizable flag for clang options.(-Xclanghip -fgpu-rdc)
                if (($curr_opt ne $tmp_opt) && exists $cmd_options_map{$tmp_opt} && ($cmd_options_map{$tmp_opt}->[1] ne '0')) {
                    $curr_opt=$tmp_opt;
                    $parameter=substr($1,$opt_len) .$3;     # if curr_opt not equal tmp_opt, the opt_len can not great than len($1)
                }
            }
   
            # If it is not an nvcc option, just push the $arg, pass it directly to clang.
            if (not exists $cmd_options_map{$curr_opt}) {
                push(@args_arr, $arg);
                next;
            }

            my $hip_opt=$cmd_options_map{$curr_opt}[0];
            my $para_num=$cmd_options_map{$curr_opt}[1];
            my $action=$cmd_options_map{$curr_opt}[2];
            my $content=$cmd_options_map{$curr_opt}[3];

            if ($action eq "E") {
                print "ERROR: Not allow exist $curr_opt\n";
                return -1;
            }
            elsif ($action eq "W") {
                $para_num=$cmd_options_map{$curr_opt}[1];
                if (length $parameter) {
                    $para_num -= 1;
                }
                if ($para_num > 0) {
                    $flag = FLAG_SKIP;
                }
                else {
                    $flag = FLAG_NONE;
                }
                if (length $content) {
                    $warned_options{$curr_opt}=$content;
                }
                else {
                    $warned_options{$curr_opt}="Not Support";
                }
                # print "warn for $curr_opt, parr_num $para_num\n";
                next;
            }
            elsif ($action eq "I") {    # ignore
                $para_num=$cmd_options_map{$curr_opt}[1];
                if (length $parameter) {
                    $para_num -= 1;
                }
                if ($para_num > 0) {
                    $flag = FLAG_SKIP;
                }
                else {
                    $flag = FLAG_NONE;
                }
                next;
            }

            # if no corresponding option, then must has an action
            if ((! length $hip_opt) && (! length $action) ) {
                print "Something wrong with the option $hip_opt in the options table\n";
                exit(-1);
            }

            $curr_opt = $hip_opt;
            # print "trans_curr: $curr_opt\n";

            if ($curr_opt eq "-Xcompiler") {
                # print "$curr_opt\n";
                $flag = FLAG_XCOMPILER;
                if (length $parameter) {
                    parse_Xcompiler($parameter);
                    $flag = FLAG_NONE;
                }
                next;
            }

            # Force ignore unsupported toolchain options.
            if (exists $toolchain_option_map{$curr_opt}) {
                # print "$curr_opt\n";
                $flag = $toolchain_option_map{$curr_opt};
                if (length $parameter) {
                    # parse_Xptxas($parameter);
                    $flag = FLAG_NONE;
                }
                next;
            }

            if ($curr_opt eq OFFLOAD_ARCH_OPTION) {
                $flag = FLAG_OFFLOAD_ARCH;
                if (length $parameter) {
                    my $warning_offload_arch_flag = parse_offload_arch($arg);
                    if ($warning_offload_arch_flag eq 0) {
                        $warned_options{$curr_opt}=$warning_offload_arch;
                    }
                    else {
                        add_value_to_map(\%parsed_options, $curr_opt, $arg);
                    }
                    $flag = FLAG_NONE;
                }
                next;
            }

            if ($curr_opt eq "-l") {
                $flag = FLAG_L;
                if (length $parameter) {
                    parse_l($parameter);
                    $flag = FLAG_NONE;
                }
                next;
            }

            if ($curr_opt eq "-fgpu-default-stream") {
                $flag = FLAG_DEFAULT_STREAM;
                if (length $parameter) {
                    parse_default_stream($parameter);
                    $flag = FLAG_NONE;
                }
                next;
            }

            if ($curr_opt eq "-rdc") {
                $flag = FLAG_RDC;
                if (length $parameter) {
                    parse_rdc($parameter);
                    $flag = FLAG_NONE;
                }
                next;
            }

            if ($curr_opt eq "-x") {
                $flag = FLAG_X;
                if (length $parameter) {
                    parse_xcu($parameter);
                    $flag = FLAG_NONE;
                }
                next;
            }

            # check if need to replace marco
            # if (exists $macros_options{$curr_opt}) {
            #     if (length $parameter) {
            #         replace_marcos($curr_opt, \$parameter);
            #     }
            #     else {
            #         $flag = FLAG_MARCOS;
            #     }
            # }


            # TODO: experimental 
            # Parameters of list options are decomposed by ",". 
            # if ($parameter =~ /,/) {
            #     my @split_params = split(',', $parameter);
            #     my $has_equal = ($split_params[0] =~ /=/);
            #     foreach my $split_param (@split_params) {
            #         $split_param =~ s/^\s+|\s+$//g;
            #         my $new_param;
            #         if ($has_equal) {
            #             $new_param = $curr_opt.'='.$split_param;
            #         } else {
            #             $new_param = $curr_opt.' '.$split_param;  
            #         }
            #         push(@args_arr, $new_param); 
            #     }
            # } else {
            #       push(@args_arr, $curr_opt.$parameter);
            # }

                  push(@args_arr, $curr_opt.$parameter);  # add the curr_opt and parameter to the args_arr

            if (length $parameter) {
                if ($para_num <= 0) {
                    print "something wrong with the option $curr_opt in the options table\n";
                    exit(-1);
                }
                add_value_to_map(\%parsed_options, $curr_opt, $parameter);
                $para_num -= 1;
            }

            if ($para_num <= 0) {
                add_value_to_map(\%parsed_options, $curr_opt, "");
                $flag = FLAG_NONE;
            }
            else {
                $flag = FLAG_NORMAL;
            }
        }
        else # not options
        {  
            # if (FLAG_MARCOS == $flag) {
            #     replace_marcos($curr_opt, \$arg);
            #     $flag = FLAG_NORMAL;
            #     # next;
            # }
            if (FLAG_SKIP == $flag) {
                $para_num -= 1;
                if ($para_num <= 0) {
                    $flag = FLAG_NONE;
                }
                next;
            }
            elsif (FLAG_XCOMPILER == $flag) {
                parse_Xcompiler($arg);
                $flag = FLAG_NONE;
                next;
            }
            elsif (FLAG_OFFLOAD_ARCH == $flag) {
                my $warning_offload_arch_flag = parse_offload_arch($arg);
                if ($warning_offload_arch_flag eq 0) {
                    $warned_options{$curr_opt}=$warning_offload_arch;
                }
                else {
                    add_value_to_map(\%parsed_options, $curr_opt, $arg);
                }
                $flag = FLAG_NONE;
                next;
            }
            elsif (FLAG_L == $flag) {
                parse_l($arg);
                $flag = FLAG_NONE;
                next;
            }
            elsif (FLAG_DEFAULT_STREAM == $flag) {
                parse_default_stream($arg);
                $flag = FLAG_NONE;
                next;
            }
            elsif(FLAG_RDC == $flag) {
                parse_rdc($arg);
                $flag = FLAG_NONE;
                next;
            }
            elsif(FLAG_X == $flag) {
                parse_xcu($arg);
                $flag = FLAG_NONE;
                next;
            }

            if (FLAG_NONE == $flag && !$need_add_rdc && ($arg =~ m/\.a$/ || $arg =~ m/\.o$/)) {
                my $fileType = `file $arg`;
                # TODO: Need to verify the difference with hipcc
                if ($fileType =~ m/ELF/ || $fileType =~ m/archive/) {
                    my $sections = `readelf -e -W $arg`;
                    if ($sections =~ m/__CLANG_OFFLOAD_BUNDLE__/) {
                        $need_add_rdc = 1;
                    }
                }
            }


            # TODO: experimental 
            # Parameters of list options are decomposed by ",". 
            # if ($arg =~ /,/) {
            #     my @split_params = split(',', $arg); 
            #     my $has_equal = ($split_params[0] =~ /=/);
            #     foreach my $split_param (@split_params) {
            #         $split_param =~ s/^\s+|\s+$//g;
            #         my $new_param;
            #         if ($has_equal) {
            #             $new_param = $curr_opt.'='.$split_param;
            #         } else {
            #             $new_param = $curr_opt.' '.$split_param;  
            #         }
            #         push(@args_arr, $new_param);
            #     }
            # } else {
            #     push(@args_arr, $arg);
            # }

                push(@args_arr, $arg);

            # without para before it
            if (FLAG_NONE == $flag) {
                push(@noflag_paras, $arg);
            }
            else
            {
                add_value_to_map(\%parsed_options, $curr_opt, $arg);
                $para_num -= 1;
                if ($para_num <= 0) {
                    $flag = FLAG_NONE;
                }
            }
        }
    }

    return 0;
}

sub replace_xcompiler_paras {
    my ($arg) = @_;
    if (length $arg) {
        if ($arg =~ m/^((--|-)[^=\/]+)([=\/]?.*)/) {
            my $curr_opt = $1;
            if (exists $xcompiler_paras_map{$curr_opt}) {
                $curr_opt = $xcompiler_paras_map{$curr_opt};
            }
            $arg = $curr_opt .$3;
        }

        # push(@args_arr, "-Xarch_host $arg");
        push(@args_arr, "$arg");
    }
}

sub parse_Xcompiler {
    my ($args) = @_;
    if ($args =~ m/^(=)(.*)/) {     # remove =
        $args = $2;
    }

    while ($args) {
        if ($args =~ /^,?(\\?"([^\\"]*)\\?")(.*)/) {    # in quote
            # $1 is with quote, $2 is not with quote
            # $3 is others
            $args = $3;

            replace_xcompiler_paras($2);
            next;
        }
        elsif ($args =~ /^([^\\"]*)(.*)/) {     # not in quote
            #$1 is the first part not in quote, $2 is next part
            $args = $2;

            if (defined $1 and length $1) {
                    foreach my $arg (split(/,/, $1)) {
                        replace_xcompiler_paras($arg);
                }
            }
            next;
        }
        last;
    }
}

# sub parse_x {
#     my ($arg) = @_;
#     if ($arg eq "c") {
#         $arg = "c";
#     }
#     else {
#         $arg = "hip";
#     }
#     push(@args_arr, "-x $arg");
#     # push(@args_arr, "-x hip");
# }

sub parse_l {
    my ($arg) = @_;
    if ($arg eq "gomp") {
        push(@args_arr, "-fopenmp");
        return;
    }
    if (exists( $math_lib_map{$arg})) {
        my $hiparg = $math_lib_map{$arg};
        push(@args_arr, "-l$hiparg");
    }
    push(@args_arr, "-l$arg");
}

sub parse_default_stream {
    my ($arg) = @_;
    if ($arg =~ /^(?:=)?per-thread$/) {
        push(@args_arr, "-fgpu-default-stream=per-thread");
    } elsif ($arg =~ /^(?:=)?(?:legacy|null)$/) {
        push(@args_arr, "-fgpu-default-stream=legacy");
    }
}

sub parse_rdc {
    my ($arg) = @_;
    if ($arg =~ /=?true$/) {
        push(@args_arr, "-fgpu-rdc");
    }
}

# File type pre-judgment
# clang cannot handle the form of "-x = cu" or "xxx.cpp -x = cu", so need to pre-judgment for file type.
# 1) nvcc_helper::parse_xcu function judges the file type in advance，and give the global flag，
# 2) nvcc needs to refer to the global flag and judge the file type.
our $PrefileTypeFlag  = 0;
our $PrefileType = "";
my $parse_xcu_count = 0;
sub parse_xcu {
    my ($arg) = @_;
    # Eliminate "=", clang does not support the "-x=cu" form.
    $arg =~ s/^=//;

    # Note "-x" options should only appear once.
    if ($parse_xcu_count >= 1) {
        print "nvcc warning : incompatible redefinition for option 'x', the last value of this option was used \n";
    }
    $parse_xcu_count = $parse_xcu_count + 1;

    # Not do any equivalent conversion here, but give the global flag.
    # push(@args_arr, "-x $arg");

    if ($arg eq 'c') {
        $PrefileTypeFlag = 1;
        $PrefileType = "c";
    } elsif ($arg eq 'c++') {
        $PrefileTypeFlag = 1;
        $PrefileType = "c++";
    } elsif ($arg eq 'cu') {
        $PrefileTypeFlag = 1;
        $PrefileType = "cu";
    }
}

# Parse the architecture-related options, obtain the available parameters, and store them in matchedTargets.
# After calling __replace_paras, use "--offload-arch=" to add the parameters of matchedTargets.
sub parse_offload_arch {
    my ($arg) = @_;
    my $warning_offload_arch_flag = 1;

    # Split parameters into lists based on commas
    my @targets = split /,/, $arg;

    foreach my $target (@targets) {
        if ($target =~ /(?:^|=)gfx(\d{3})$/) {
            my $num = $1;
            if (exists $knownTargets{$num}) {
                $matchedTargets{$num} = 1;
            } else {
                $matchedTargets{$num} = 0;
                if ($warning_offload_arch_flag != 0) {
                    $warning_offload_arch_flag = 0;
                }
            }
        } else {
                if ($warning_offload_arch_flag != 0) {
                    $warning_offload_arch_flag = 0;
                }
        }
    }
    return $warning_offload_arch_flag;
}

# sub check_targets {
#     my ($arg) = @_;
#     if ($arg =~ /=?gfx(\d{3})$/) {
#         if (exists $knownTargets{$1}) {
#             return $1;
#         }
#     }
#     return 0;
# }


sub print_info {
    # if has --version option, so don't need to print warning
    if (exists $parsed_options{"--version"}) {
        print "HIPHSA: Author SUGON\n";
        return;
    }

    if (exists $warned_options{OFFLOAD_ARCH_OPTION()} or ! exists $parsed_options{OFFLOAD_ARCH_OPTION()}) {
        $warned_options{OFFLOAD_ARCH_OPTION()}=$warning_offload_arch;
    }

    # print warning
    foreach my $key (keys %warned_options) {
        my $content = $warned_options{$key};
        if (! length $content) {
            $content = $cmd_options_map{$key}[3];
        }
        if ($key eq OFFLOAD_ARCH_OPTION) {
            print "Warning: $content\n";
            next;
        }
        print "Warning: $key $content\n";
    }
}

sub check_use_ar {
    my $use_ar=0;
    if (exists $parsed_options{"-lib"}) {
        if (exists $parsed_options{"-o"}) {
            my $tmp_para = $parsed_options{"-o"};
            if (scalar @{$tmp_para} >= 1) {
                my $output = $tmp_para->[0];
                if ($output =~ m/\.a$/) {
                    $use_ar = 1;
                }
            }
        }
    }
    if ($use_ar == 1) {
        my $CMD = "ar rcs";
        $CMD .= " $parsed_options{'-o'}->[0] ";
        $CMD .= join " ", @noflag_paras;
        system ("$CMD");
        if ($? == -1) {
            print "failed to execute: $!\n";
            exit($?);
        }
        elsif ($? & 127) {
            printf "child died with signal %d, %s coredump\n",
            ($? & 127),  ($? & 128) ? 'with' : 'without';
            exit($?);
        }
        exit(0);
    }
}

# Although both native CUDA and DTK-CUDA support separate compilation, there are differences in the process.
# Native CUDA supports "-dc" and "-dlink" to compile and link device code separately;
# DTK-CUDA only supports "-dc" and "-hip-link", and "-hip-link" binds the overall link process of host and device code, and there is no independent link process of device code.
# Therefore, in terms of separate compilation, DTK-CUDA needs to be compatible with the native CUDA process.
# check_use_dlink function is a Fake dlink process to be compatible with the native CUDA process.
sub check_use_dlink {
    my $DLINK_PATH = $ENV{'ROCM_PATH'};
    my $use_dlink=0;
    if (exists $parsed_options{"-dlink"}) {
        $use_dlink = 1;
    }
    my $cuda_code = "void __sugon_cudamocker() {}";
    if ($use_dlink == 1) {
        my $CMD = "$DLINK_PATH/llvm/bin/clang++ -c -x c++ -o";
        if (exists $parsed_options{"-o"}) {
            # If the specified output file exists, modify the fake output and function.
            $CMD .= " $parsed_options{'-o'}->[0] ";
            my $dlink_output = $parsed_options{'-o'}->[0];
            $dlink_output =~ s/[^a-zA-Z0-9_]/_/g;
            $cuda_code = "void __sugon_cudamocker_$dlink_output() {}";
        }
        else {
            $CMD .= " a_dlink.o ";
        }
        $CMD .= " -";
        my $pipeCMD="echo '$cuda_code' | $CMD";
        system ("$pipeCMD");
        if ($? == -1) {
            print "failed to execute: $!\n";
            exit($?);
        }
        elsif ($? & 127) {
            printf "child died with signal %d, %s coredump\n",
            ($? & 127),  ($? & 128) ? 'with' : 'without';
            exit($?);
        }
        exit(0);
    }
}

sub replace_para_cuda_to_hipcc {
    my ($args_ref) = @_;

    paras_arr_to_map(@nvcc_paras_arr);

    replace_paras(@$args_ref);

    # print_map(%parsed_options);
    # print "noflag_paras: @noflag_paras\n";
    
    check_use_dlink();
    check_use_ar();

    if ($need_add_rdc) {
        push(@args_arr, "-fgpu-rdc");
    }

    print_info();
    # if (exists $parsed_options{"--version"}) {
    #     return 0;   # does nothing
    # }
    
    # need to transfer to string line, then split into array to avoid the special space
    my $line = join " ", @args_arr;
    @$args_ref= grep{ /\S/} split(" ", $line);
    return 0;
}

1;
