Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
YOLOv7-tiny_triton
Commits
c68e1835
Commit
c68e1835
authored
Sep 18, 2023
by
lijian6
Browse files
Initial commit
parents
Pipeline
#561
failed with stages
in 0 seconds
Changes
184
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3119 additions
and
0 deletions
+3119
-0
src/c++/perf_analyzer/mock_data_loader.h
src/c++/perf_analyzer/mock_data_loader.h
+98
-0
src/c++/perf_analyzer/mock_infer_context.h
src/c++/perf_analyzer/mock_infer_context.h
+69
-0
src/c++/perf_analyzer/mock_infer_data_manager.h
src/c++/perf_analyzer/mock_infer_data_manager.h
+140
-0
src/c++/perf_analyzer/mock_inference_profiler.h
src/c++/perf_analyzer/mock_inference_profiler.h
+123
-0
src/c++/perf_analyzer/mock_load_manager.h
src/c++/perf_analyzer/mock_load_manager.h
+37
-0
src/c++/perf_analyzer/mock_model_parser.h
src/c++/perf_analyzer/mock_model_parser.h
+78
-0
src/c++/perf_analyzer/mock_profile_data_collector.h
src/c++/perf_analyzer/mock_profile_data_collector.h
+54
-0
src/c++/perf_analyzer/mock_profile_data_exporter.h
src/c++/perf_analyzer/mock_profile_data_exporter.h
+74
-0
src/c++/perf_analyzer/mock_request_rate_worker.h
src/c++/perf_analyzer/mock_request_rate_worker.h
+79
-0
src/c++/perf_analyzer/mock_sequence_manager.h
src/c++/perf_analyzer/mock_sequence_manager.h
+91
-0
src/c++/perf_analyzer/model_parser.cc
src/c++/perf_analyzer/model_parser.cc
+437
-0
src/c++/perf_analyzer/model_parser.h
src/c++/perf_analyzer/model_parser.h
+229
-0
src/c++/perf_analyzer/mpi_utils.cc
src/c++/perf_analyzer/mpi_utils.cc
+251
-0
src/c++/perf_analyzer/mpi_utils.h
src/c++/perf_analyzer/mpi_utils.h
+85
-0
src/c++/perf_analyzer/perf_analyzer.cc
src/c++/perf_analyzer/perf_analyzer.cc
+448
-0
src/c++/perf_analyzer/perf_analyzer.h
src/c++/perf_analyzer/perf_analyzer.h
+202
-0
src/c++/perf_analyzer/perf_analyzer_exception.h
src/c++/perf_analyzer/perf_analyzer_exception.h
+54
-0
src/c++/perf_analyzer/perf_analyzer_unit_tests.cc
src/c++/perf_analyzer/perf_analyzer_unit_tests.cc
+39
-0
src/c++/perf_analyzer/perf_utils.cc
src/c++/perf_analyzer/perf_utils.cc
+391
-0
src/c++/perf_analyzer/perf_utils.h
src/c++/perf_analyzer/perf_utils.h
+140
-0
No files found.
Too many changes to show.
To preserve performance only
184 of 184+
files are displayed.
Plain diff
Email patch
src/c++/perf_analyzer/mock_data_loader.h
0 → 100644
View file @
c68e1835
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "data_loader.h"
#include "gmock/gmock.h"
namespace
triton
{
namespace
perfanalyzer
{
/// Mock DataLoader class used for testing to allow JSON data to be read
/// from string, rather than file.
///
class
NaggyMockDataLoader
:
public
DataLoader
{
public:
NaggyMockDataLoader
()
{
SetupMocks
();
}
NaggyMockDataLoader
(
size_t
batch_size
)
:
DataLoader
(
batch_size
)
{
SetupMocks
();
}
void
SetupMocks
()
{
ON_CALL
(
*
this
,
GetTotalSteps
(
testing
::
_
))
.
WillByDefault
([
this
](
size_t
stream_id
)
->
size_t
{
return
this
->
DataLoader
::
GetTotalSteps
(
stream_id
);
});
ON_CALL
(
*
this
,
ReadFile
(
testing
::
_
,
testing
::
_
))
.
WillByDefault
(
[
this
](
const
std
::
string
&
path
,
std
::
vector
<
char
>*
contents
)
->
cb
::
Error
{
return
this
->
DataLoader
::
ReadFile
(
path
,
contents
);
});
ON_CALL
(
*
this
,
ReadTextFile
(
testing
::
_
,
testing
::
_
))
.
WillByDefault
(
[
this
](
const
std
::
string
&
path
,
std
::
vector
<
std
::
string
>*
contents
)
->
cb
::
Error
{
return
this
->
DataLoader
::
ReadTextFile
(
path
,
contents
);
});
}
MOCK_METHOD
(
size_t
,
GetTotalSteps
,
(
size_t
),
(
override
));
MOCK_METHOD
(
cb
::
Error
,
ReadFile
,
(
const
std
::
string
&
,
std
::
vector
<
char
>*
));
MOCK_METHOD
(
cb
::
Error
,
ReadTextFile
,
(
const
std
::
string
&
,
std
::
vector
<
std
::
string
>*
));
cb
::
Error
ReadDataFromJSON
(
const
std
::
shared_ptr
<
ModelTensorMap
>&
inputs
,
const
std
::
shared_ptr
<
ModelTensorMap
>&
outputs
,
const
std
::
string
&
json_file
)
override
{
return
ReadDataFromStr
(
json_file
,
inputs
,
outputs
);
}
cb
::
Error
ReadDataFromStr
(
const
std
::
string
&
str
,
const
std
::
shared_ptr
<
ModelTensorMap
>&
inputs
,
const
std
::
shared_ptr
<
ModelTensorMap
>&
outputs
)
{
rapidjson
::
Document
d
{};
const
unsigned
int
parseFlags
=
rapidjson
::
kParseNanAndInfFlag
;
d
.
Parse
<
parseFlags
>
(
str
.
c_str
());
return
ParseData
(
d
,
inputs
,
outputs
);
};
std
::
vector
<
size_t
>&
step_num_
{
DataLoader
::
step_num_
};
size_t
&
data_stream_cnt_
{
DataLoader
::
data_stream_cnt_
};
};
// Non-naggy version of Mock Data Loader (won't warn when using default gmock
// mocked function)
using
MockDataLoader
=
testing
::
NiceMock
<
NaggyMockDataLoader
>
;
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/mock_infer_context.h
0 → 100644
View file @
c68e1835
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "infer_context.h"
namespace
triton
{
namespace
perfanalyzer
{
class
NaggyMockInferContext
:
public
InferContext
{
public:
NaggyMockInferContext
()
{
ON_CALL
(
*
this
,
SendRequest
(
testing
::
_
,
testing
::
_
,
testing
::
_
))
.
WillByDefault
(
[
this
](
const
uint64_t
request_id
,
const
bool
delayed
,
const
uint64_t
sequence_id
)
->
void
{
this
->
InferContext
::
SendRequest
(
request_id
,
delayed
,
sequence_id
);
});
}
MOCK_METHOD
(
void
,
SendRequest
,
(
const
uint64_t
,
const
bool
,
const
uint64_t
),
(
override
));
std
::
shared_ptr
<
SequenceManager
>&
sequence_manager_
{
InferContext
::
sequence_manager_
};
std
::
shared_ptr
<
DataLoader
>&
data_loader_
{
InferContext
::
data_loader_
};
std
::
shared_ptr
<
IInferDataManager
>&
infer_data_manager_
{
InferContext
::
infer_data_manager_
};
std
::
shared_ptr
<
ThreadStat
>&
thread_stat_
{
InferContext
::
thread_stat_
};
std
::
reference_wrapper
<
const
bool
>&
execute_
{
InferContext
::
execute_
};
bool
&
using_json_data_
{
InferContext
::
using_json_data_
};
bool
&
async_
{
InferContext
::
async_
};
bool
&
streaming_
{
InferContext
::
streaming_
};
InferData
&
infer_data_
{
InferContext
::
infer_data_
};
std
::
unique_ptr
<
cb
::
ClientBackend
>&
infer_backend_
{
InferContext
::
infer_backend_
};
std
::
function
<
void
(
cb
::
InferResult
*
)
>&
async_callback_func_
{
InferContext
::
async_callback_func_
};
};
using
MockInferContext
=
testing
::
NiceMock
<
NaggyMockInferContext
>
;
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/mock_infer_data_manager.h
0 → 100644
View file @
c68e1835
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "infer_data_manager.h"
#include "infer_data_manager_shm.h"
#include "mock_client_backend.h"
namespace
triton
{
namespace
perfanalyzer
{
class
MockInferDataManagerShm
:
public
InferDataManagerShm
{
public:
MockInferDataManagerShm
(
const
int32_t
batch_size
,
const
SharedMemoryType
shared_memory_type
,
const
size_t
output_shm_size
,
const
std
::
shared_ptr
<
ModelParser
>&
parser
,
const
std
::
shared_ptr
<
cb
::
ClientBackendFactory
>&
factory
,
const
std
::
shared_ptr
<
DataLoader
>&
data_loader
)
:
InferDataManagerShm
(
batch_size
,
shared_memory_type
,
output_shm_size
,
parser
,
factory
,
data_loader
)
{
}
// Mocked version of the CopySharedMemory method in loadmanager.
// Tracks the mapping of shared memory label to data
//
cb
::
Error
CopySharedMemory
(
uint8_t
*
input_shm_ptr
,
const
std
::
vector
<
TensorData
>&
input_datas
,
bool
is_shape_tensor
,
std
::
string
&
region_name
)
override
{
std
::
vector
<
int32_t
>
vals
;
for
(
size_t
i
=
0
;
i
<
input_datas
.
size
();
i
++
)
{
int32_t
val
=
*
reinterpret_cast
<
const
int32_t
*>
(
input_datas
[
i
].
data_ptr
);
vals
.
push_back
(
val
);
}
mocked_shared_memory_regions
.
insert
(
std
::
make_pair
(
region_name
,
vals
));
return
cb
::
Error
::
Success
;
}
cb
::
Error
CreateInferInput
(
cb
::
InferInput
**
infer_input
,
const
cb
::
BackendKind
kind
,
const
std
::
string
&
name
,
const
std
::
vector
<
int64_t
>&
dims
,
const
std
::
string
&
datatype
)
override
{
*
infer_input
=
new
cb
::
MockInferInput
(
kind
,
name
,
dims
,
datatype
);
return
cb
::
Error
::
Success
;
}
// Tracks the mapping of shared memory label to data
std
::
map
<
std
::
string
,
std
::
vector
<
int32_t
>>
mocked_shared_memory_regions
;
};
class
MockInferDataManager
:
public
InferDataManager
{
public:
MockInferDataManager
()
{
SetupMocks
();
}
MockInferDataManager
(
const
size_t
max_threads
,
const
int32_t
batch_size
,
const
std
::
shared_ptr
<
ModelParser
>&
parser
,
const
std
::
shared_ptr
<
cb
::
ClientBackendFactory
>&
factory
,
const
std
::
shared_ptr
<
DataLoader
>&
data_loader
)
:
InferDataManager
(
max_threads
,
batch_size
,
parser
,
factory
,
data_loader
)
{
SetupMocks
();
}
void
SetupMocks
()
{
ON_CALL
(
*
this
,
UpdateInferData
(
testing
::
_
,
testing
::
_
,
testing
::
_
,
testing
::
_
))
.
WillByDefault
(
[
this
](
size_t
thread_id
,
int
stream_index
,
int
step_index
,
InferData
&
infer_data
)
->
cb
::
Error
{
return
this
->
InferDataManager
::
UpdateInferData
(
thread_id
,
stream_index
,
step_index
,
infer_data
);
});
}
MOCK_METHOD
(
cb
::
Error
,
UpdateInferData
,
(
size_t
,
int
,
int
,
InferData
&
),
(
override
));
cb
::
Error
CreateInferInput
(
cb
::
InferInput
**
infer_input
,
const
cb
::
BackendKind
kind
,
const
std
::
string
&
name
,
const
std
::
vector
<
int64_t
>&
dims
,
const
std
::
string
&
datatype
)
override
{
*
infer_input
=
new
cb
::
MockInferInput
(
kind
,
name
,
dims
,
datatype
);
return
cb
::
Error
::
Success
;
}
};
class
MockInferDataManagerFactory
{
public:
static
std
::
shared_ptr
<
IInferDataManager
>
CreateMockInferDataManager
(
const
size_t
max_threads
,
const
int32_t
batch_size
,
const
SharedMemoryType
shared_memory_type
,
const
size_t
output_shm_size
,
const
std
::
shared_ptr
<
ModelParser
>&
parser
,
const
std
::
shared_ptr
<
cb
::
ClientBackendFactory
>&
factory
,
const
std
::
shared_ptr
<
DataLoader
>&
data_loader
)
{
if
(
shared_memory_type
==
SharedMemoryType
::
NO_SHARED_MEMORY
)
{
return
std
::
make_shared
<
testing
::
NiceMock
<
MockInferDataManager
>>
(
max_threads
,
batch_size
,
parser
,
factory
,
data_loader
);
}
else
{
return
std
::
make_shared
<
testing
::
NiceMock
<
MockInferDataManagerShm
>>
(
batch_size
,
shared_memory_type
,
output_shm_size
,
parser
,
factory
,
data_loader
);
}
}
};
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/mock_inference_profiler.h
0 → 100644
View file @
c68e1835
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "inference_profiler.h"
namespace
triton
{
namespace
perfanalyzer
{
class
NaggyMockInferenceProfiler
:
public
InferenceProfiler
{
public:
NaggyMockInferenceProfiler
()
{
ON_CALL
(
*
this
,
ValidLatencyMeasurement
(
testing
::
_
,
testing
::
_
,
testing
::
_
,
testing
::
_
,
testing
::
_
,
testing
::
_
))
.
WillByDefault
(
[
this
](
const
std
::
pair
<
uint64_t
,
uint64_t
>&
valid_range
,
size_t
&
valid_sequence_count
,
size_t
&
delayed_request_count
,
std
::
vector
<
uint64_t
>*
latencies
,
size_t
&
response_count
,
std
::
vector
<
RequestRecord
>&
valid_requests
)
->
void
{
this
->
InferenceProfiler
::
ValidLatencyMeasurement
(
valid_range
,
valid_sequence_count
,
delayed_request_count
,
latencies
,
response_count
,
valid_requests
);
});
ON_CALL
(
*
this
,
SummarizeLatency
(
testing
::
_
,
testing
::
_
))
.
WillByDefault
(
[
this
](
const
std
::
vector
<
uint64_t
>&
latencies
,
PerfStatus
&
summary
)
->
cb
::
Error
{
return
this
->
InferenceProfiler
::
SummarizeLatency
(
latencies
,
summary
);
});
ON_CALL
(
*
this
,
MergePerfStatusReports
(
testing
::
_
,
testing
::
_
))
.
WillByDefault
(
[
this
](
std
::
deque
<
PerfStatus
>&
perf_status
,
PerfStatus
&
summary_status
)
->
cb
::
Error
{
return
this
->
InferenceProfiler
::
MergePerfStatusReports
(
perf_status
,
summary_status
);
});
ON_CALL
(
*
this
,
MergeServerSideStats
(
testing
::
_
,
testing
::
_
))
.
WillByDefault
(
[
this
](
std
::
vector
<
ServerSideStats
>&
server_side_stats
,
ServerSideStats
&
server_side_summary
)
->
cb
::
Error
{
return
this
->
InferenceProfiler
::
MergeServerSideStats
(
server_side_stats
,
server_side_summary
);
});
ON_CALL
(
*
this
,
SummarizeClientStat
(
testing
::
_
,
testing
::
_
,
testing
::
_
,
testing
::
_
,
testing
::
_
,
testing
::
_
,
testing
::
_
,
testing
::
_
))
.
WillByDefault
(
[
this
](
const
cb
::
InferStat
&
start_stat
,
const
cb
::
InferStat
&
end_stat
,
const
uint64_t
duration_ns
,
const
size_t
valid_request_count
,
const
size_t
delayed_request_count
,
const
size_t
valid_sequence_count
,
const
size_t
response_count
,
PerfStatus
&
summary
)
->
cb
::
Error
{
return
this
->
InferenceProfiler
::
SummarizeClientStat
(
start_stat
,
end_stat
,
duration_ns
,
valid_request_count
,
delayed_request_count
,
valid_sequence_count
,
response_count
,
summary
);
});
};
MOCK_METHOD0
(
IncludeServerStats
,
bool
());
MOCK_METHOD
(
void
,
ValidLatencyMeasurement
,
((
const
std
::
pair
<
uint64_t
,
uint64_t
>&
),
size_t
&
,
size_t
&
,
std
::
vector
<
uint64_t
>*
,
size_t
&
,
std
::
vector
<
RequestRecord
>&
),
(
override
));
MOCK_METHOD
(
cb
::
Error
,
SummarizeLatency
,
(
const
std
::
vector
<
uint64_t
>&
,
PerfStatus
&
),
(
override
));
MOCK_METHOD
(
cb
::
Error
,
MergePerfStatusReports
,
(
std
::
deque
<
PerfStatus
>&
,
PerfStatus
&
),
(
override
));
MOCK_METHOD
(
cb
::
Error
,
MergeServerSideStats
,
(
std
::
vector
<
ServerSideStats
>&
,
ServerSideStats
&
),
(
override
));
MOCK_METHOD
(
cb
::
Error
,
SummarizeClientStat
,
(
const
cb
::
InferStat
&
,
const
cb
::
InferStat
&
,
const
uint64_t
,
const
size_t
,
const
size_t
,
const
size_t
,
const
size_t
,
PerfStatus
&
),
(
override
));
std
::
shared_ptr
<
ModelParser
>&
parser_
{
InferenceProfiler
::
parser_
};
std
::
unique_ptr
<
LoadManager
>&
manager_
{
InferenceProfiler
::
manager_
};
bool
&
include_lib_stats_
{
InferenceProfiler
::
include_lib_stats_
};
std
::
vector
<
RequestRecord
>&
all_request_records_
{
InferenceProfiler
::
all_request_records_
};
};
using
MockInferenceProfiler
=
testing
::
NiceMock
<
NaggyMockInferenceProfiler
>
;
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/mock_load_manager.h
0 → 100644
View file @
c68e1835
// Copyright 2023 (c), NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "load_manager.h"
namespace
triton
{
namespace
perfanalyzer
{
class
NaggyMockLoadManager
:
public
LoadManager
{};
using
MockLoadManager
=
testing
::
NiceMock
<
NaggyMockLoadManager
>
;
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/mock_model_parser.h
0 → 100644
View file @
c68e1835
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "model_parser.h"
namespace
triton
{
namespace
perfanalyzer
{
class
MockModelParser
:
public
ModelParser
{
public:
MockModelParser
()
:
ModelParser
(
clientbackend
::
BackendKind
::
TRITON
)
{}
MockModelParser
(
bool
is_sequence_model
,
bool
is_decoupled_model
,
size_t
max_batch_size
=
64
)
:
ModelParser
(
clientbackend
::
BackendKind
::
TRITON
)
{
if
(
is_sequence_model
)
{
scheduler_type_
=
ModelParser
::
SEQUENCE
;
}
is_decoupled_
=
is_decoupled_model
;
max_batch_size_
=
max_batch_size
;
}
// Expose private function
cb
::
Error
GetInt
(
const
rapidjson
::
Value
&
value
,
int64_t
*
integer_value
)
{
return
ModelParser
::
GetInt
(
value
,
integer_value
);
}
// Expose private function
cb
::
Error
DetermineComposingModelMap
(
const
std
::
vector
<
cb
::
ModelIdentifier
>&
bls_composing_models
,
const
rapidjson
::
Document
&
config
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
)
{
return
ModelParser
::
DetermineComposingModelMap
(
bls_composing_models
,
config
,
backend
);
}
// Expose private function
cb
::
Error
DetermineSchedulerType
(
const
rapidjson
::
Document
&
config
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
)
{
return
ModelParser
::
DetermineSchedulerType
(
config
,
backend
);
}
std
::
shared_ptr
<
ComposingModelMap
>&
composing_models_map_
{
ModelParser
::
composing_models_map_
};
std
::
shared_ptr
<
ModelTensorMap
>&
inputs_
{
ModelParser
::
inputs_
};
};
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/mock_profile_data_collector.h
0 → 100644
View file @
c68e1835
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "profile_data_collector.h"
namespace
triton
{
namespace
perfanalyzer
{
class
NaggyMockProfileDataCollector
:
public
ProfileDataCollector
{
public:
NaggyMockProfileDataCollector
()
{
ON_CALL
(
*
this
,
FindExperiment
(
testing
::
_
))
.
WillByDefault
(
[
this
](
InferenceLoadMode
&
id
)
->
std
::
vector
<
Experiment
>::
iterator
{
return
this
->
ProfileDataCollector
::
FindExperiment
(
id
);
});
}
MOCK_METHOD
(
std
::
vector
<
Experiment
>::
iterator
,
FindExperiment
,
(
InferenceLoadMode
&
),
(
override
));
std
::
vector
<
Experiment
>&
experiments_
{
ProfileDataCollector
::
experiments_
};
};
using
MockProfileDataCollector
=
testing
::
NiceMock
<
NaggyMockProfileDataCollector
>
;
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/mock_profile_data_exporter.h
0 → 100644
View file @
c68e1835
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS"" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "profile_data_exporter.h"
namespace
triton
{
namespace
perfanalyzer
{
class
NaggyMockProfileDataExporter
:
public
ProfileDataExporter
{
public:
NaggyMockProfileDataExporter
()
{
ON_CALL
(
*
this
,
ConvertToJson
(
testing
::
_
,
testing
::
_
))
.
WillByDefault
(
[
this
](
const
std
::
vector
<
Experiment
>&
raw_experiments
,
std
::
string
&
raw_version
)
->
void
{
return
this
->
ProfileDataExporter
::
ConvertToJson
(
raw_experiments
,
raw_version
);
});
ON_CALL
(
*
this
,
OutputToFile
(
testing
::
_
))
.
WillByDefault
([
this
](
std
::
string
&
file_path
)
->
void
{
this
->
ProfileDataExporter
::
OutputToFile
(
file_path
);
});
ON_CALL
(
*
this
,
AddExperiment
(
testing
::
_
,
testing
::
_
,
testing
::
_
))
.
WillByDefault
(
[
this
](
rapidjson
::
Value
&
entry
,
rapidjson
::
Value
&
experiment
,
const
Experiment
&
raw_experiment
)
->
void
{
this
->
ProfileDataExporter
::
AddExperiment
(
entry
,
experiment
,
raw_experiment
);
});
}
MOCK_METHOD
(
void
,
ConvertToJson
,
(
const
std
::
vector
<
Experiment
>&
,
std
::
string
&
),
(
override
));
MOCK_METHOD
(
void
,
AddExperiment
,
(
rapidjson
::
Value
&
,
rapidjson
::
Value
&
,
const
Experiment
&
),
(
override
));
MOCK_METHOD
(
void
,
OutputToFile
,
(
std
::
string
&
),
(
override
));
rapidjson
::
Document
&
document_
{
ProfileDataExporter
::
document_
};
};
using
MockProfileDataExporter
=
testing
::
NiceMock
<
NaggyMockProfileDataExporter
>
;
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/mock_request_rate_worker.h
0 → 100644
View file @
c68e1835
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "request_rate_worker.h"
namespace
triton
{
namespace
perfanalyzer
{
class
NaggyMockRequestRateWorker
:
public
RequestRateWorker
{
public:
NaggyMockRequestRateWorker
(
uint32_t
id
,
std
::
shared_ptr
<
ThreadStat
>
thread_stat
,
std
::
shared_ptr
<
ThreadConfig
>
thread_config
,
const
std
::
shared_ptr
<
ModelParser
>
parser
,
std
::
shared_ptr
<
DataLoader
>
data_loader
,
const
std
::
shared_ptr
<
cb
::
ClientBackendFactory
>
factory
,
const
bool
on_sequence_model
,
const
bool
async
,
const
size_t
max_threads
,
const
bool
using_json_data
,
const
bool
streaming
,
const
int32_t
batch_size
,
std
::
condition_variable
&
wake_signal
,
std
::
mutex
&
wake_mutex
,
bool
&
execute
,
std
::
chrono
::
steady_clock
::
time_point
&
start_time
,
const
bool
serial_sequences
,
const
std
::
shared_ptr
<
IInferDataManager
>&
infer_data_manager
,
std
::
shared_ptr
<
SequenceManager
>
sequence_manager
)
:
RequestRateWorker
(
id
,
thread_stat
,
thread_config
,
parser
,
data_loader
,
factory
,
on_sequence_model
,
async
,
max_threads
,
using_json_data
,
streaming
,
batch_size
,
wake_signal
,
wake_mutex
,
execute
,
start_time
,
serial_sequences
,
infer_data_manager
,
sequence_manager
)
{
ON_CALL
(
*
this
,
Infer
()).
WillByDefault
([
this
]()
->
void
{
RequestRateWorker
::
Infer
();
});
}
MOCK_METHOD
(
void
,
Infer
,
(),
(
override
));
void
CreateContext
()
override
{
RequestRateWorker
::
CreateContext
();
}
void
SendInferRequest
()
{
if
(
thread_stat_
->
status_
.
IsOk
())
{
LoadWorker
::
SendInferRequest
(
0
,
false
);
}
}
void
EmptyInfer
()
{
thread_config_
->
is_paused_
=
true
;
}
};
// Non-naggy version of Mock (won't warn when using default gmock
// mocked function)
using
MockRequestRateWorker
=
testing
::
NiceMock
<
NaggyMockRequestRateWorker
>
;
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/mock_sequence_manager.h
0 → 100644
View file @
c68e1835
// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "gmock/gmock.h"
#include "sequence_manager.h"
namespace
triton
{
namespace
perfanalyzer
{
class
NaggyMockSequenceManager
:
public
SequenceManager
{
public:
NaggyMockSequenceManager
()
{
SetupMocks
();
}
NaggyMockSequenceManager
(
const
uint64_t
start_sequence_id
,
const
uint64_t
sequence_id_range
,
const
size_t
sequence_length
,
const
bool
sequence_length_specified
,
const
double
sequence_length_variation
,
const
bool
using_json_data
,
std
::
shared_ptr
<
DataLoader
>
data_loader
)
:
SequenceManager
(
start_sequence_id
,
sequence_id_range
,
sequence_length
,
sequence_length_specified
,
sequence_length_variation
,
using_json_data
,
data_loader
)
{
SetupMocks
();
}
void
SetupMocks
()
{
ON_CALL
(
*
this
,
SetInferSequenceOptions
(
testing
::
_
,
testing
::
_
))
.
WillByDefault
([
this
](
const
uint32_t
seq_stat_index
,
std
::
unique_ptr
<
cb
::
InferOptions
>&
options
)
{
this
->
SequenceManager
::
SetInferSequenceOptions
(
seq_stat_index
,
options
);
});
ON_CALL
(
*
this
,
InitNewSequence
(
testing
::
_
))
.
WillByDefault
([
this
](
int
seq_stat_index
)
{
this
->
SequenceManager
::
InitNewSequence
(
seq_stat_index
);
});
ON_CALL
(
*
this
,
GetNextSeqId
(
testing
::
_
))
.
WillByDefault
([
this
](
int
seq_stat_index
)
->
uint64_t
{
return
this
->
SequenceManager
::
GetNextSeqId
(
seq_stat_index
);
});
ON_CALL
(
*
this
,
GetRandomSequenceLength
(
testing
::
_
))
.
WillByDefault
([
this
](
double
offset_ratio
)
->
size_t
{
return
this
->
SequenceManager
::
GetRandomSequenceLength
(
offset_ratio
);
});
ON_CALL
(
*
this
,
GetNewDataStreamId
()).
WillByDefault
([
this
]()
->
size_t
{
return
this
->
SequenceManager
::
GetNewDataStreamId
();
});
}
MOCK_METHOD
(
void
,
SetInferSequenceOptions
,
(
const
uint32_t
,
std
::
unique_ptr
<
cb
::
InferOptions
>&
),
(
override
));
MOCK_METHOD
(
void
,
InitNewSequence
,
(
int
),
(
override
));
MOCK_METHOD
(
uint64_t
,
GetNextSeqId
,
(
int
),
(
override
));
MOCK_METHOD
(
size_t
,
GetRandomSequenceLength
,
(
double
),
(
override
));
MOCK_METHOD
(
uint64_t
,
GetNewDataStreamId
,
(),
(
override
));
std
::
vector
<
std
::
shared_ptr
<
SequenceStatus
>>&
sequence_statuses_
{
SequenceManager
::
sequence_statuses_
};
std
::
atomic
<
uint64_t
>&
curr_seq_id_
{
SequenceManager
::
curr_seq_id_
};
};
using
MockSequenceManager
=
testing
::
NiceMock
<
NaggyMockSequenceManager
>
;
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/model_parser.cc
0 → 100644
View file @
c68e1835
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "model_parser.h"
#include "rapidjson/writer.h"
namespace
triton
{
namespace
perfanalyzer
{
cb
::
Error
ModelParser
::
InitTriton
(
const
rapidjson
::
Document
&
metadata
,
const
rapidjson
::
Document
&
config
,
const
std
::
string
&
model_version
,
const
std
::
vector
<
cb
::
ModelIdentifier
>&
bls_composing_models
,
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>&
input_shapes
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
)
{
model_name_
=
metadata
[
"name"
].
GetString
();
model_version_
=
model_version
;
RETURN_IF_ERROR
(
DetermineComposingModelMap
(
bls_composing_models
,
config
,
backend
));
RETURN_IF_ERROR
(
DetermineSchedulerType
(
config
,
backend
));
max_batch_size_
=
0
;
const
auto
bs_itr
=
config
.
FindMember
(
"max_batch_size"
);
if
(
bs_itr
!=
config
.
MemberEnd
())
{
int64_t
mbs
;
RETURN_IF_ERROR
(
GetInt
(
bs_itr
->
value
,
&
mbs
));
max_batch_size_
=
mbs
;
}
const
auto
txn_itr
=
config
.
FindMember
(
"model_transaction_policy"
);
if
(
txn_itr
!=
config
.
MemberEnd
())
{
is_decoupled_
=
txn_itr
->
value
[
"decoupled"
].
GetBool
();
}
// Get the information about inputs from metadata
const
auto
inputs_itr
=
metadata
.
FindMember
(
"inputs"
);
if
(
inputs_itr
!=
metadata
.
MemberEnd
())
{
for
(
const
auto
&
input
:
inputs_itr
->
value
.
GetArray
())
{
auto
it
=
inputs_
->
emplace
(
input
[
"name"
].
GetString
(),
ModelTensor
()).
first
;
it
->
second
.
name_
=
input
[
"name"
].
GetString
();
it
->
second
.
datatype_
=
input
[
"datatype"
].
GetString
();
bool
is_dynamic
=
false
;
bool
skip
=
(
max_batch_size_
>
0
);
for
(
const
auto
&
dim
:
input
[
"shape"
].
GetArray
())
{
if
(
skip
)
{
skip
=
false
;
continue
;
}
int64_t
dim_int
;
RETURN_IF_ERROR
(
GetInt
(
dim
,
&
dim_int
));
if
(
dim_int
==
-
1
)
{
is_dynamic
=
true
;
}
it
->
second
.
shape_
.
push_back
(
dim_int
);
}
if
(
is_dynamic
)
{
const
auto
user_shape_it
=
input_shapes
.
find
(
it
->
second
.
name_
);
if
(
user_shape_it
!=
input_shapes
.
end
())
{
// Update the default shape to be used.
it
->
second
.
shape_
.
clear
();
for
(
const
auto
dim
:
user_shape_it
->
second
)
{
it
->
second
.
shape_
.
push_back
(
dim
);
}
}
}
}
}
// Check whether the tensor is shape tensor or not from config.
const
auto
inputs_config_itr
=
config
.
FindMember
(
"input"
);
if
(
inputs_config_itr
!=
config
.
MemberEnd
())
{
for
(
const
auto
&
input_config
:
inputs_config_itr
->
value
.
GetArray
())
{
const
auto
name
=
std
::
string
(
input_config
[
"name"
].
GetString
(),
input_config
[
"name"
].
GetStringLength
());
auto
it
=
inputs_
->
find
(
name
);
if
(
it
==
inputs_
->
end
())
{
return
cb
::
Error
(
"no metadata found for input tensor "
+
name
,
pa
::
GENERIC_ERROR
);
}
const
auto
&
shape_tensor_itr
=
input_config
.
FindMember
(
"is_shape_tensor"
);
if
(
shape_tensor_itr
!=
input_config
.
MemberEnd
())
{
it
->
second
.
is_shape_tensor_
=
shape_tensor_itr
->
value
.
GetBool
();
}
if
(
input_config
.
HasMember
(
"optional"
))
{
it
->
second
.
is_optional_
=
input_config
[
"optional"
].
GetBool
();
}
else
{
it
->
second
.
is_optional_
=
false
;
}
}
}
// Get the information about outputs from metadata
const
auto
outputs_itr
=
metadata
.
FindMember
(
"outputs"
);
if
(
outputs_itr
!=
metadata
.
MemberEnd
())
{
for
(
const
auto
&
output
:
outputs_itr
->
value
.
GetArray
())
{
auto
it
=
outputs_
->
emplace
(
output
[
"name"
].
GetString
(),
ModelTensor
()).
first
;
it
->
second
.
name_
=
output
[
"name"
].
GetString
();
it
->
second
.
datatype_
=
output
[
"datatype"
].
GetString
();
bool
skip
=
(
max_batch_size_
>
0
);
for
(
const
auto
&
dim
:
output
[
"shape"
].
GetArray
())
{
if
(
skip
)
{
skip
=
false
;
continue
;
}
int64_t
dim_int
;
RETURN_IF_ERROR
(
GetInt
(
dim
,
&
dim_int
));
it
->
second
.
shape_
.
push_back
(
dim_int
);
}
}
}
// Check whether the tensor is shape tensor or not from config.
const
auto
output_config_itr
=
config
.
FindMember
(
"output"
);
if
(
output_config_itr
!=
config
.
MemberEnd
())
{
for
(
const
auto
&
output_config
:
output_config_itr
->
value
.
GetArray
())
{
const
auto
name
=
std
::
string
(
output_config
[
"name"
].
GetString
(),
output_config
[
"name"
].
GetStringLength
());
auto
itr
=
outputs_
->
find
(
name
);
if
(
itr
==
outputs_
->
end
())
{
return
cb
::
Error
(
"no metadata found for output tensor "
+
name
,
pa
::
GENERIC_ERROR
);
}
const
auto
&
shape_tensor_itr
=
output_config
.
FindMember
(
"is_shape_tensor"
);
if
(
shape_tensor_itr
!=
output_config
.
MemberEnd
())
{
itr
->
second
.
is_shape_tensor_
=
shape_tensor_itr
->
value
.
GetBool
();
}
}
}
// Check if model has response caching enabled
const
auto
cache_itr
=
config
.
FindMember
(
"response_cache"
);
// response_cache_enabled_ set globally for reporting purposes if any
// composing model has it enabled, so don't overwrite it if already set
if
(
cache_itr
!=
config
.
MemberEnd
()
&&
!
response_cache_enabled_
)
{
response_cache_enabled_
=
cache_itr
->
value
[
"enable"
].
GetBool
();
}
return
cb
::
Error
::
Success
;
}
cb
::
Error
ModelParser
::
InitTFServe
(
const
rapidjson
::
Document
&
metadata
,
const
std
::
string
&
model_name
,
const
std
::
string
&
model_version
,
const
std
::
string
&
model_signature_name
,
const
int32_t
batch_size
,
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>&
input_shapes
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
)
{
model_name_
=
model_name
;
model_version_
=
model_version
;
model_signature_name_
=
model_signature_name
;
// Get the scheduler type for the model
scheduler_type_
=
NONE
;
// Will use the user provided batch size as max. Relies on the service
// to throw an error if not supported.
max_batch_size_
=
batch_size
;
const
rapidjson
::
Value
&
signature_config
=
metadata
[
"metadata"
][
"signature_def"
][
"signature_def"
];
if
(
!
signature_config
.
HasMember
(
model_signature_name
.
c_str
()))
{
return
cb
::
Error
(
"Failed to find signature_name
\"
"
+
model_signature_name
+
"
\"
in the metadata"
,
pa
::
GENERIC_ERROR
);
}
// Get the information about inputs from metadata
if
(
signature_config
[
model_signature_name
.
c_str
()].
HasMember
(
"inputs"
))
{
const
rapidjson
::
Value
&
inputs
=
signature_config
[
model_signature_name
.
c_str
()][
"inputs"
];
for
(
rapidjson
::
Value
::
ConstMemberIterator
json_itr
=
inputs
.
MemberBegin
();
json_itr
!=
inputs
.
MemberEnd
();
++
json_itr
)
{
auto
it
=
inputs_
->
emplace
(
json_itr
->
name
.
GetString
(),
ModelTensor
()).
first
;
it
->
second
.
name_
=
json_itr
->
name
.
GetString
();
RETURN_IF_ERROR
(
ConvertDTypeFromTFS
(
json_itr
->
value
[
"dtype"
].
GetString
(),
&
it
->
second
.
datatype_
));
bool
is_dynamic
=
false
;
if
(
json_itr
->
value
[
"tensor_shape"
][
"unknown_rank"
].
GetBool
())
{
if
(
max_batch_size_
!=
0
)
{
return
cb
::
Error
(
"Can not specify -b flag for saved model with unknown ranked "
"inputs"
,
pa
::
GENERIC_ERROR
);
}
is_dynamic
=
true
;
}
else
{
bool
first_dim
=
true
;
for
(
const
auto
&
dim
:
json_itr
->
value
[
"tensor_shape"
][
"dim"
].
GetArray
())
{
int64_t
dim_int
;
RETURN_IF_ERROR
(
GetInt
(
dim
[
"size"
],
&
dim_int
));
if
(
first_dim
&&
(
max_batch_size_
!=
0
))
{
if
(
dim_int
!=
-
1
)
{
return
cb
::
Error
(
"Can not specify -b flag for saved model with input not "
"having their first dim as -1"
,
pa
::
GENERIC_ERROR
);
}
first_dim
=
false
;
}
else
{
if
(
dim_int
==
-
1
)
{
is_dynamic
=
true
;
}
it
->
second
.
shape_
.
push_back
(
dim_int
);
}
}
}
if
(
is_dynamic
)
{
const
auto
user_shape_it
=
input_shapes
.
find
(
it
->
second
.
name_
);
if
(
user_shape_it
!=
input_shapes
.
end
())
{
// Update the default shape to be used.
it
->
second
.
shape_
.
clear
();
for
(
const
auto
dim
:
user_shape_it
->
second
)
{
it
->
second
.
shape_
.
push_back
(
dim
);
}
}
}
}
}
// Will not extract the information about the information about the outputs.
// As by default, the TensorFlow serving will return all the output tensors
// if none are requested.
// See here
// https://github.com/tensorflow/serving/blob/2.3.0/tensorflow_serving/apis/predict.proto#L27
return
cb
::
Error
::
Success
;
}
cb
::
Error
ModelParser
::
InitTorchServe
(
const
std
::
string
&
model_name
,
const
std
::
string
&
model_version
,
const
int32_t
batch_size
)
{
// TorchServe does not return model metadata hence we can not obtain any
// parameters.
model_name_
=
model_name
;
model_version_
=
model_version
;
max_batch_size_
=
batch_size
;
// TorchServe needs to upload a file to the server. The input will hold the
// path to the file which should be provided as json to --input-data
auto
it
=
inputs_
->
emplace
(
"TORCHSERVE_INPUT"
,
ModelTensor
()).
first
;
it
->
second
.
name_
=
"TORCHSERVE_INPUT"
;
it
->
second
.
datatype_
=
"BYTES"
;
// Supports only a single input file
it
->
second
.
shape_
.
push_back
(
1
);
return
cb
::
Error
::
Success
;
}
cb
::
Error
ModelParser
::
DetermineComposingModelMap
(
const
std
::
vector
<
cb
::
ModelIdentifier
>&
bls_composing_models
,
const
rapidjson
::
Document
&
config
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
)
{
RETURN_IF_ERROR
(
AddBLSComposingModels
(
bls_composing_models
,
config
,
backend
));
RETURN_IF_ERROR
(
AddEnsembleComposingModels
(
config
,
backend
));
return
cb
::
Error
::
Success
;
}
cb
::
Error
ModelParser
::
AddBLSComposingModels
(
const
std
::
vector
<
cb
::
ModelIdentifier
>&
bls_composing_models
,
const
rapidjson
::
Document
&
config
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
)
{
for
(
auto
model
:
bls_composing_models
)
{
(
*
composing_models_map_
)[
config
[
"name"
].
GetString
()].
insert
(
model
);
rapidjson
::
Document
composing_model_config
;
RETURN_IF_ERROR
(
backend
->
ModelConfig
(
&
composing_model_config
,
model
.
first
,
model
.
second
));
RETURN_IF_ERROR
(
AddEnsembleComposingModels
(
composing_model_config
,
backend
));
}
return
cb
::
Error
::
Success
;
}
cb
::
Error
ModelParser
::
AddEnsembleComposingModels
(
const
rapidjson
::
Document
&
config
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
)
{
if
(
config
.
HasMember
(
"platform"
)
&&
std
::
string
(
config
[
"platform"
].
GetString
()).
compare
(
"ensemble"
)
==
0
)
{
const
auto
step_itr
=
config
[
"ensemble_scheduling"
].
FindMember
(
"step"
);
for
(
const
auto
&
step
:
step_itr
->
value
.
GetArray
())
{
std
::
string
step_model_version
;
int64_t
model_version_int
;
RETURN_IF_ERROR
(
GetInt
(
step
[
"model_version"
],
&
model_version_int
));
if
(
model_version_int
==
-
1
)
{
step_model_version
=
""
;
}
else
{
step_model_version
=
std
::
to_string
(
model_version_int
);
}
(
*
composing_models_map_
)[
config
[
"name"
].
GetString
()].
emplace
(
std
::
string
(
step
[
"model_name"
].
GetString
()),
step_model_version
);
rapidjson
::
Document
composing_model_config
;
RETURN_IF_ERROR
(
backend
->
ModelConfig
(
&
composing_model_config
,
step
[
"model_name"
].
GetString
(),
step_model_version
));
RETURN_IF_ERROR
(
AddEnsembleComposingModels
(
composing_model_config
,
backend
));
}
}
return
cb
::
Error
::
Success
;
}
cb
::
Error
ModelParser
::
DetermineSchedulerType
(
const
rapidjson
::
Document
&
config
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
)
{
scheduler_type_
=
NONE
;
if
(
composing_models_map_
->
size
()
!=
0
)
{
bool
is_sequential
=
false
;
RETURN_IF_ERROR
(
GetComposingSchedulerType
(
backend
,
&
is_sequential
));
if
(
is_sequential
)
{
scheduler_type_
=
ENSEMBLE_SEQUENCE
;
}
else
{
scheduler_type_
=
ENSEMBLE
;
}
}
else
{
const
auto
&
sequence_itr
=
config
.
FindMember
(
"sequence_batching"
);
if
(
sequence_itr
!=
config
.
MemberEnd
())
{
scheduler_type_
=
SEQUENCE
;
}
else
{
const
auto
&
dynamic_itr
=
config
.
FindMember
(
"dynamic_batching"
);
if
(
dynamic_itr
!=
config
.
MemberEnd
())
{
scheduler_type_
=
DYNAMIC
;
}
}
}
return
cb
::
Error
::
Success
;
}
cb
::
Error
ModelParser
::
GetComposingSchedulerType
(
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
,
bool
*
is_sequential
)
{
for
(
auto
parent_composing_models
:
*
composing_models_map_
.
get
())
{
auto
&
composing_models
=
parent_composing_models
.
second
;
for
(
auto
composing_model
:
composing_models
)
{
rapidjson
::
Document
config
;
RETURN_IF_ERROR
(
backend
->
ModelConfig
(
&
config
,
composing_model
.
first
,
composing_model
.
second
));
const
auto
&
sequence_itr
=
config
.
FindMember
(
"sequence_batching"
);
if
(
sequence_itr
!=
config
.
MemberEnd
())
{
*
is_sequential
=
true
;
}
const
auto
cache_itr
=
config
.
FindMember
(
"response_cache"
);
// response_cache_enabled_ set globally for reporting purposes if any
// composing model has it enabled, so don't overwrite it if already set
if
(
cache_itr
!=
config
.
MemberEnd
()
&&
!
response_cache_enabled_
)
{
response_cache_enabled_
=
cache_itr
->
value
[
"enable"
].
GetBool
();
}
}
}
return
cb
::
Error
::
Success
;
}
cb
::
Error
ModelParser
::
GetInt
(
const
rapidjson
::
Value
&
value
,
int64_t
*
integer_value
)
{
if
(
value
.
IsString
())
{
std
::
string
str
(
value
.
GetString
(),
value
.
GetStringLength
());
try
{
*
integer_value
=
std
::
stoll
(
str
.
c_str
());
}
catch
(...)
{
return
cb
::
Error
(
std
::
string
(
"unable to convert '"
)
+
str
+
"' to integer"
,
pa
::
GENERIC_ERROR
);
}
}
else
if
(
value
.
IsInt64
())
{
*
integer_value
=
value
.
GetInt64
();
}
else
if
(
value
.
IsInt
())
{
*
integer_value
=
value
.
GetInt
();
}
else
{
return
cb
::
Error
(
"failed to parse the integer value"
,
pa
::
GENERIC_ERROR
);
}
return
cb
::
Error
::
Success
;
}
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/model_parser.h
0 → 100644
View file @
c68e1835
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <unordered_map>
#include "client_backend/client_backend.h"
#include "perf_utils.h"
namespace
triton
{
namespace
perfanalyzer
{
#ifndef DOCTEST_CONFIG_DISABLE
class
TestModelParser
;
class
MockModelParser
;
#endif
struct
ModelTensor
{
ModelTensor
()
:
is_shape_tensor_
(
false
)
{}
std
::
string
name_
;
std
::
string
datatype_
;
std
::
vector
<
int64_t
>
shape_
;
// Indicates if this tensor holds shape information for other tensors
bool
is_shape_tensor_
;
bool
is_optional_
;
};
using
ModelTensorMap
=
std
::
map
<
std
::
string
,
ModelTensor
>
;
using
ComposingModelMap
=
std
::
map
<
std
::
string
,
std
::
set
<
cb
::
ModelIdentifier
>>
;
//==============================================================================
/// ModelParser is a helper class to parse the information about the target
/// model from the metadata and configuration returned by the server.
///
/// Perf Analyzer depends upon the various properties of the model to correctly
/// generate and issue inference request for the model. The object of this
/// class will provide these necessary details.
class
ModelParser
{
public:
enum
ModelSchedulerType
{
NONE
,
DYNAMIC
,
SEQUENCE
,
ENSEMBLE
,
ENSEMBLE_SEQUENCE
};
explicit
ModelParser
(
cb
::
BackendKind
backend_kind
)
:
backend_kind_
(
backend_kind
),
inputs_
(
std
::
make_shared
<
ModelTensorMap
>
()),
outputs_
(
std
::
make_shared
<
ModelTensorMap
>
()),
composing_models_map_
(
std
::
make_shared
<
ComposingModelMap
>
()),
scheduler_type_
(
NONE
),
max_batch_size_
(
0
),
is_decoupled_
(
false
),
response_cache_enabled_
(
false
)
{
}
/// Initializes the ModelParser with the metadata and config rapidjson DOM
/// for the target model obtained from Triton service
/// \param metadata The metadata of the target model.
/// \param config The config of the target model.
/// \param model_version The version of target model.
/// \param bls_composing_models A list of BLS composing model identifiers
/// \param input_shapes The user provided default shapes which will be use
/// if a certain input has wildcard in its dimension.
/// \param backend The backend object.
/// \return cb::Error object indicating success or failure.
cb
::
Error
InitTriton
(
const
rapidjson
::
Document
&
metadata
,
const
rapidjson
::
Document
&
config
,
const
std
::
string
&
model_version
,
const
std
::
vector
<
cb
::
ModelIdentifier
>&
bls_composing_models
,
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>&
input_shapes
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
);
/// Initializes the ModelParser with the metadata and config rapidjson DOM
/// for the target model obtained from TF serving service.
/// \param metadata The metadata of the target model.
/// \param model_name The name of target model.
/// \param model_version The version of target model.
/// \param model_signature_name The signature name of target model.
/// \param input_shapes The user provided default shapes which will be use
/// if a certain input has wildcard in its dimension.
/// \param backend The backend object.
/// \return cb::Error object indicating success or failure.
cb
::
Error
InitTFServe
(
const
rapidjson
::
Document
&
metadata
,
const
std
::
string
&
model_name
,
const
std
::
string
&
model_version
,
const
std
::
string
&
model_signature_name
,
const
int32_t
batch_size
,
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int64_t
>>&
input_shapes
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
);
cb
::
Error
InitTorchServe
(
const
std
::
string
&
model_name
,
const
std
::
string
&
model_version
,
const
int32_t
batch_size
);
/// Get the name of the target model
/// \return Model name as string
const
std
::
string
&
ModelName
()
const
{
return
model_name_
;
}
/// Get the version of target model
/// \return Model version as string
const
std
::
string
&
ModelVersion
()
const
{
return
model_version_
;
}
/// Get the signature name of target model
/// \return Model signature name as string
const
std
::
string
&
ModelSignatureName
()
const
{
return
model_signature_name_
;
}
/// Get the scheduler type for the model
ModelSchedulerType
SchedulerType
()
const
{
return
scheduler_type_
;
}
/// Get the max batch size supported by the model. Returns 0 if the model
/// does not support batching.
/// \return The maximum supported batch size.
size_t
MaxBatchSize
()
const
{
return
max_batch_size_
;
}
/// Returns whether or not the model is decoupled
/// \return the truth value of whether the model is decoupled
bool
IsDecoupled
()
const
{
return
is_decoupled_
;
}
/// Returns whether or not response cache is enabled for this model
/// \return the truth value of whether response cache is enabled for this
/// model
bool
ResponseCacheEnabled
()
const
{
return
response_cache_enabled_
;
}
/// Get the details about the model inputs.
/// \return The map with tensor_name and the tensor details
/// stored as key-value pair.
const
std
::
shared_ptr
<
ModelTensorMap
>&
Inputs
()
{
return
inputs_
;
}
/// Get the details about the model outputs.
/// \return The map with tensor_name and the tensor details
/// stored as key-value pair.
const
std
::
shared_ptr
<
ModelTensorMap
>&
Outputs
()
{
return
outputs_
;
}
/// Get the composing maps for the target model.
/// \return The pointer to the nested map describing the
/// nested flow in the target model.
const
std
::
shared_ptr
<
ComposingModelMap
>&
GetComposingModelMap
()
{
return
composing_models_map_
;
}
protected:
ModelSchedulerType
scheduler_type_
;
bool
is_decoupled_
;
private:
/// Populate composing_models_map_ based on any bls composing models passed in
/// via the CLI as well as any ensemble or nested ensemble models
cb
::
Error
DetermineComposingModelMap
(
const
std
::
vector
<
cb
::
ModelIdentifier
>&
bls_composing_models
,
const
rapidjson
::
Document
&
config
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
);
cb
::
Error
AddBLSComposingModels
(
const
std
::
vector
<
cb
::
ModelIdentifier
>&
bls_composing_models
,
const
rapidjson
::
Document
&
config
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
);
cb
::
Error
AddEnsembleComposingModels
(
const
rapidjson
::
Document
&
config
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
);
/// Populate scheduler_type_ based on the scheduler type of the parent model
/// as well as any composing models
cb
::
Error
DetermineSchedulerType
(
const
rapidjson
::
Document
&
config
,
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
);
/// Sets is_sequential to true if any of the composing models are sequential
cb
::
Error
GetComposingSchedulerType
(
std
::
unique_ptr
<
cb
::
ClientBackend
>&
backend
,
bool
*
is_sequential
);
/// In the json produced by protobuf, int64 and uint64 values are
/// represented as strings. Protobuf doesn't provide an option to
/// disable this (sigh) so we need to correctly parse these fields
/// for ModelParser to receive appropriate requests.
/// \param value The rapidjson value object with the int value.
/// \param integer_value The output integer pointer.
/// \return cb::Error object indicating success or failure.
cb
::
Error
GetInt
(
const
rapidjson
::
Value
&
value
,
int64_t
*
integer_value
);
cb
::
BackendKind
backend_kind_
;
std
::
shared_ptr
<
ModelTensorMap
>
inputs_
;
std
::
shared_ptr
<
ModelTensorMap
>
outputs_
;
std
::
shared_ptr
<
ComposingModelMap
>
composing_models_map_
;
std
::
string
model_name_
;
std
::
string
model_version_
;
std
::
string
model_signature_name_
;
size_t
max_batch_size_
;
bool
response_cache_enabled_
;
#ifndef DOCTEST_CONFIG_DISABLE
friend
TestModelParser
;
friend
MockModelParser
;
public:
ModelParser
()
=
default
;
#endif
};
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/mpi_utils.cc
0 → 100644
View file @
c68e1835
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "mpi_utils.h"
#include <dlfcn.h>
#include <iostream>
#include <stdexcept>
namespace
triton
{
namespace
perfanalyzer
{
MPIDriver
::
MPIDriver
(
bool
is_enabled
)
:
is_enabled_
(
is_enabled
)
{
if
(
is_enabled_
==
false
)
{
return
;
}
handle_
=
dlopen
(
"libmpi.so"
,
RTLD_LAZY
|
RTLD_GLOBAL
);
if
(
handle_
==
nullptr
)
{
throw
std
::
runtime_error
(
"Unable to load MPI library. If you are trying to run with "
"MPI / multiple models, check that 'libmpi.so' is on "
"`LD_LIBRARY_PATH` environment variable path."
);
}
CheckMPIImpl
();
}
bool
MPIDriver
::
IsMPIRun
()
{
if
(
is_enabled_
==
false
)
{
return
false
;
}
if
(
MPIInitialized
()
==
false
)
{
throw
std
::
runtime_error
(
"Must call MPI_Init() before calling IsMPIRun()."
);
}
return
MPICommSizeWorld
()
>
1
;
}
void
MPIDriver
::
MPIInit
(
int
*
argc
,
char
***
argv
)
{
if
(
is_enabled_
==
false
)
{
return
;
}
int
(
*
MPI_Init
)(
int
*
,
char
***
){(
int
(
*
)(
int
*
,
char
***
))
dlsym
(
handle_
,
"MPI_Init"
)};
if
(
MPI_Init
==
nullptr
)
{
throw
std
::
runtime_error
(
"Unable to obtain address of `MPI_Init` symbol."
);
}
MPI_Init
(
argc
,
argv
);
}
int
MPIDriver
::
MPICommSizeWorld
()
{
if
(
is_enabled_
==
false
)
{
return
-
1
;
}
int
world_size
{
1
};
int
(
*
MPI_Comm_size
)(
void
*
,
int
*
){(
int
(
*
)(
void
*
,
int
*
))
dlsym
(
handle_
,
"MPI_Comm_size"
)};
if
(
MPI_Comm_size
==
nullptr
)
{
throw
std
::
runtime_error
(
"Unable to obtain address of `MPI_Comm_size` symbol."
);
}
MPI_Comm_size
(
MPICommWorld
(),
&
world_size
);
return
world_size
;
}
void
MPIDriver
::
MPIBarrierWorld
()
{
if
(
is_enabled_
==
false
)
{
return
;
}
int
(
*
MPI_Barrier
)(
void
*
){(
int
(
*
)(
void
*
))
dlsym
(
handle_
,
"MPI_Barrier"
)};
if
(
MPI_Barrier
==
nullptr
)
{
throw
std
::
runtime_error
(
"Unable to obtain address of `MPI_Barrier` symbol."
);
}
MPI_Barrier
(
MPICommWorld
());
}
int
MPIDriver
::
MPICommRankWorld
()
{
if
(
is_enabled_
==
false
)
{
return
-
1
;
}
int
rank
{
0
};
int
(
*
MPI_Comm_rank
)(
void
*
,
int
*
){(
int
(
*
)(
void
*
,
int
*
))
dlsym
(
handle_
,
"MPI_Comm_rank"
)};
if
(
MPI_Comm_rank
==
nullptr
)
{
throw
std
::
runtime_error
(
"Unable to obtain address of `MPI_Comm_rank` symbol."
);
}
MPI_Comm_rank
(
MPICommWorld
(),
&
rank
);
return
rank
;
}
void
MPIDriver
::
MPIBcastIntWorld
(
void
*
buffer
,
int
count
,
int
root
)
{
if
(
is_enabled_
==
false
)
{
return
;
}
int
(
*
MPI_Bcast
)(
void
*
,
int
,
void
*
,
int
,
void
*
){
(
int
(
*
)(
void
*
,
int
,
void
*
,
int
,
void
*
))
dlsym
(
handle_
,
"MPI_Bcast"
)};
if
(
MPI_Bcast
==
nullptr
)
{
throw
std
::
runtime_error
(
"Unable to obtain address of `MPI_Bcast` symbol."
);
}
MPI_Bcast
(
buffer
,
count
,
MPIInt
(),
root
,
MPICommWorld
());
}
void
MPIDriver
::
MPIFinalize
()
{
if
(
is_enabled_
==
false
)
{
return
;
}
int
(
*
MPI_Finalize
)(){(
int
(
*
)())
dlsym
(
handle_
,
"MPI_Finalize"
)};
if
(
MPI_Finalize
==
nullptr
)
{
throw
std
::
runtime_error
(
"Unable to obtain address of `MPI_Finalize` symbol."
);
}
MPI_Finalize
();
}
bool
MPIDriver
::
MPIInitialized
()
{
if
(
is_enabled_
==
false
)
{
return
false
;
}
int
(
*
MPI_Initialized
)(
int
*
){
(
int
(
*
)(
int
*
))
dlsym
(
handle_
,
"MPI_Initialized"
)};
if
(
MPI_Initialized
==
nullptr
)
{
throw
std
::
runtime_error
(
"Unable to obtain address of `MPI_Initialized` symbol."
);
}
int
initialized
{
0
};
MPI_Initialized
(
&
initialized
);
return
initialized
!=
0
;
}
void
*
MPIDriver
::
MPICommWorld
()
{
if
(
is_enabled_
==
false
)
{
return
nullptr
;
}
void
*
MPI_COMM_WORLD
{
dlsym
(
handle_
,
"ompi_mpi_comm_world"
)};
if
(
MPI_COMM_WORLD
==
nullptr
)
{
throw
std
::
runtime_error
(
"Unable to obtain address of `ompi_mpi_comm_world` symbol."
);
}
return
MPI_COMM_WORLD
;
}
void
*
MPIDriver
::
MPIInt
()
{
if
(
is_enabled_
==
false
)
{
return
nullptr
;
}
void
*
MPI_INT
{
dlsym
(
handle_
,
"ompi_mpi_int"
)};
if
(
MPI_INT
==
nullptr
)
{
throw
std
::
runtime_error
(
"Unable to obtain address of `ompi_mpi_int` symbol."
);
}
return
MPI_INT
;
}
void
MPIDriver
::
CheckMPIImpl
()
{
if
(
is_enabled_
==
false
)
{
return
;
}
int
(
*
MPI_Get_library_version
)(
char
*
,
int
*
){
(
int
(
*
)(
char
*
,
int
*
))
dlsym
(
handle_
,
"MPI_Get_library_version"
)};
if
(
MPI_Get_library_version
==
nullptr
)
{
throw
std
::
runtime_error
(
"Unable to obtain address of `MPI_Get_library_version` symbol."
);
}
std
::
string
version
;
version
.
resize
(
MPIVersionStringMaximumLength
);
int
resultlen
{
0
};
MPI_Get_library_version
(
&
version
[
0
],
&
resultlen
);
if
(
version
.
find
(
"Open MPI"
)
!=
0
)
{
throw
std
::
runtime_error
(
"Perf Analyzer only supports Open MPI. Please uninstall your current "
"implementation of MPI and install Open MPI."
);
}
}
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/mpi_utils.h
0 → 100644
View file @
c68e1835
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <memory>
namespace
triton
{
namespace
perfanalyzer
{
class
MPIDriver
{
public:
// Initializes class. Saves handle to MPI library if MPI library is available.
MPIDriver
(
bool
is_enabled
=
false
);
// Returns true if the current process is an MPI process with world size
// greater than 1.
bool
IsMPIRun
();
// Attempts to call MPI_Init API.
void
MPIInit
(
int
*
argc
,
char
***
argv
);
// Attempts to call MPI_Comm_size API with MPI_COMM_WORLD communicator.
int
MPICommSizeWorld
();
// Attempts to call MPI_Barrier API with MPI_COMM_WORLD communicator.
void
MPIBarrierWorld
();
// Attempts to call MPI_Comm_rank API with MPI_COMM_WORLD communicator.
int
MPICommRankWorld
();
// Attempts to call MPI_Bcast API with MPI_INT data type and MPI_COMM_WORLD
// communicator.
void
MPIBcastIntWorld
(
void
*
buffer
,
int
count
,
int
root
);
// Attempts to call MPI_Finalize API.
void
MPIFinalize
();
private:
// Attempts to call MPI_Initialized API.
bool
MPIInitialized
();
// Returns MPI_COMM_WORLD symbol address if MPI library is available,
// otherwise `nullptr`.
void
*
MPICommWorld
();
// Returns MPI_INT symbol address if MPI library is available, otherwise
// `nullptr`.
void
*
MPIInt
();
// Attempts to check that Open MPI is installed.
void
CheckMPIImpl
();
// Bool for whether user has opted to attempt to use MPI functionality.
bool
is_enabled_
{
false
};
// Loaded object for MPI library.
void
*
handle_
{
nullptr
};
// Maximum string length for MPI version string.
const
uint64_t
MPIVersionStringMaximumLength
{
32768
};
};
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/perf_analyzer.cc
0 → 100644
View file @
c68e1835
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "perf_analyzer.h"
#include "perf_analyzer_exception.h"
#include "report_writer.h"
#include "request_rate_manager.h"
namespace
pa
=
triton
::
perfanalyzer
;
namespace
triton
{
namespace
perfanalyzer
{
volatile
bool
early_exit
=
false
;
void
SignalHandler
(
int
signum
)
{
std
::
cout
<<
"Interrupt signal ("
<<
signum
<<
") received."
<<
std
::
endl
;
// Upon invoking the SignalHandler for the first time early_exit flag is
// invoked and analyzer waits for in-flight inferences to complete before
// exiting. On the second invocation, the program exits immediately.
if
(
!
early_exit
)
{
std
::
cout
<<
"Waiting for in-flight inferences to complete."
<<
std
::
endl
;
early_exit
=
true
;
}
else
{
std
::
cout
<<
"Exiting immediately..."
<<
std
::
endl
;
exit
(
0
);
}
}
}}
// namespace triton::perfanalyzer
PerfAnalyzer
::
PerfAnalyzer
(
pa
::
PAParamsPtr
params
)
:
params_
(
params
)
{
CreateAnalyzerObjects
();
}
void
PerfAnalyzer
::
Run
()
{
PrerunReport
();
Profile
();
WriteReport
();
GenerateProfileExport
();
Finalize
();
}
void
PerfAnalyzer
::
CreateAnalyzerObjects
()
{
// trap SIGINT to allow threads to exit gracefully
signal
(
SIGINT
,
pa
::
SignalHandler
);
std
::
shared_ptr
<
cb
::
ClientBackendFactory
>
factory
;
FAIL_IF_ERR
(
cb
::
ClientBackendFactory
::
Create
(
params_
->
kind
,
params_
->
url
,
params_
->
protocol
,
params_
->
ssl_options
,
params_
->
trace_options
,
params_
->
compression_algorithm
,
params_
->
http_headers
,
params_
->
triton_server_path
,
params_
->
model_repository_path
,
params_
->
extra_verbose
,
params_
->
metrics_url
,
params_
->
input_tensor_format
,
params_
->
output_tensor_format
,
&
factory
),
"failed to create client factory"
);
FAIL_IF_ERR
(
factory
->
CreateClientBackend
(
&
backend_
),
"failed to create triton client backend"
);
parser_
=
std
::
make_shared
<
pa
::
ModelParser
>
(
params_
->
kind
);
if
(
params_
->
kind
==
cb
::
BackendKind
::
TRITON
||
params_
->
kind
==
cb
::
BackendKind
::
TRITON_C_API
)
{
rapidjson
::
Document
model_metadata
;
FAIL_IF_ERR
(
backend_
->
ModelMetadata
(
&
model_metadata
,
params_
->
model_name
,
params_
->
model_version
),
"failed to get model metadata"
);
rapidjson
::
Document
model_config
;
FAIL_IF_ERR
(
backend_
->
ModelConfig
(
&
model_config
,
params_
->
model_name
,
params_
->
model_version
),
"failed to get model config"
);
FAIL_IF_ERR
(
parser_
->
InitTriton
(
model_metadata
,
model_config
,
params_
->
model_version
,
params_
->
bls_composing_models
,
params_
->
input_shapes
,
backend_
),
"failed to create model parser"
);
}
else
if
(
params_
->
kind
==
cb
::
BackendKind
::
TENSORFLOW_SERVING
)
{
rapidjson
::
Document
model_metadata
;
FAIL_IF_ERR
(
backend_
->
ModelMetadata
(
&
model_metadata
,
params_
->
model_name
,
params_
->
model_version
),
"failed to get model metadata"
);
FAIL_IF_ERR
(
parser_
->
InitTFServe
(
model_metadata
,
params_
->
model_name
,
params_
->
model_version
,
params_
->
model_signature_name
,
params_
->
batch_size
,
params_
->
input_shapes
,
backend_
),
"failed to create model parser"
);
}
else
if
(
params_
->
kind
==
cb
::
BackendKind
::
TORCHSERVE
)
{
FAIL_IF_ERR
(
parser_
->
InitTorchServe
(
params_
->
model_name
,
params_
->
model_version
,
params_
->
batch_size
),
"failed to create model parser"
);
}
else
{
std
::
cerr
<<
"unsupported client backend kind"
<<
std
::
endl
;
throw
pa
::
PerfAnalyzerException
(
pa
::
GENERIC_ERROR
);
}
if
((
parser_
->
MaxBatchSize
()
==
0
)
&&
params_
->
batch_size
>
1
)
{
std
::
cerr
<<
"can not specify batch size > 1 as the model does not support "
"batching"
<<
std
::
endl
;
throw
pa
::
PerfAnalyzerException
(
pa
::
GENERIC_ERROR
);
}
// Change the default value for the --async option for sequential models
if
((
parser_
->
SchedulerType
()
==
pa
::
ModelParser
::
SEQUENCE
)
||
(
parser_
->
SchedulerType
()
==
pa
::
ModelParser
::
ENSEMBLE_SEQUENCE
))
{
if
(
!
params_
->
async
)
{
params_
->
async
=
params_
->
forced_sync
?
false
:
true
;
}
// Validate the batch_size specification
if
(
params_
->
batch_size
>
1
)
{
std
::
cerr
<<
"can not specify batch size > 1 when using a sequence model"
<<
std
::
endl
;
throw
pa
::
PerfAnalyzerException
(
pa
::
GENERIC_ERROR
);
}
}
if
(
params_
->
streaming
)
{
if
(
params_
->
forced_sync
)
{
std
::
cerr
<<
"can not use streaming with synchronous API"
<<
std
::
endl
;
throw
pa
::
PerfAnalyzerException
(
pa
::
GENERIC_ERROR
);
}
params_
->
async
=
true
;
}
std
::
unique_ptr
<
pa
::
LoadManager
>
manager
;
if
(
params_
->
targeting_concurrency
())
{
if
((
parser_
->
SchedulerType
()
==
pa
::
ModelParser
::
SEQUENCE
)
||
(
parser_
->
SchedulerType
()
==
pa
::
ModelParser
::
ENSEMBLE_SEQUENCE
))
{
if
(
params_
->
concurrency_range
.
end
==
pa
::
NO_LIMIT
&&
params_
->
async
)
{
std
::
cerr
<<
"The 'end' concurrency can not be 0 for sequence "
"models when using asynchronous API."
<<
std
::
endl
;
throw
pa
::
PerfAnalyzerException
(
pa
::
GENERIC_ERROR
);
}
}
params_
->
max_concurrency
=
std
::
max
(
params_
->
concurrency_range
.
start
,
params_
->
concurrency_range
.
end
);
if
(
!
params_
->
async
)
{
if
(
params_
->
concurrency_range
.
end
==
pa
::
NO_LIMIT
)
{
std
::
cerr
<<
"WARNING: The maximum attainable concurrency will be limited by "
"max_threads specification."
<<
std
::
endl
;
params_
->
concurrency_range
.
end
=
params_
->
max_threads
;
}
else
{
// As only one synchronous request can be generated from a thread at a
// time, to maintain the requested concurrency, that many threads need
// to be generated.
if
(
params_
->
max_threads_specified
)
{
std
::
cerr
<<
"WARNING: Overriding max_threads specification to ensure "
"requested concurrency range."
<<
std
::
endl
;
}
params_
->
max_threads
=
std
::
max
(
params_
->
concurrency_range
.
start
,
params_
->
concurrency_range
.
end
);
}
}
if
((
params_
->
sequence_id_range
!=
0
)
&&
(
params_
->
sequence_id_range
<
params_
->
max_concurrency
))
{
std
::
cerr
<<
"sequence id range specified is smaller than the "
<<
"maximum possible concurrency, sequence id collision may "
<<
"occur."
<<
std
::
endl
;
throw
pa
::
PerfAnalyzerException
(
pa
::
GENERIC_ERROR
);
}
FAIL_IF_ERR
(
pa
::
ConcurrencyManager
::
Create
(
params_
->
async
,
params_
->
streaming
,
params_
->
batch_size
,
params_
->
max_threads
,
params_
->
max_concurrency
,
params_
->
shared_memory_type
,
params_
->
output_shm_size
,
parser_
,
factory
,
&
manager
),
"failed to create concurrency manager"
);
}
else
if
(
params_
->
using_request_rate_range
)
{
if
((
params_
->
sequence_id_range
!=
0
)
&&
(
params_
->
sequence_id_range
<
params_
->
num_of_sequences
))
{
std
::
cerr
<<
"sequence id range specified is smaller than the "
<<
"maximum possible number of sequences, sequence id collision "
<<
"may occur."
<<
std
::
endl
;
throw
pa
::
PerfAnalyzerException
(
pa
::
GENERIC_ERROR
);
}
FAIL_IF_ERR
(
pa
::
RequestRateManager
::
Create
(
params_
->
async
,
params_
->
streaming
,
params_
->
measurement_window_ms
,
params_
->
max_trials
,
params_
->
request_distribution
,
params_
->
batch_size
,
params_
->
max_threads
,
params_
->
num_of_sequences
,
params_
->
shared_memory_type
,
params_
->
output_shm_size
,
params_
->
serial_sequences
,
parser_
,
factory
,
&
manager
),
"failed to create request rate manager"
);
}
else
{
if
((
params_
->
sequence_id_range
!=
0
)
&&
(
params_
->
sequence_id_range
<
params_
->
num_of_sequences
))
{
std
::
cerr
<<
"sequence id range specified is smaller than the "
<<
"maximum possible number of sequences, sequence id collision "
<<
"may occur."
<<
std
::
endl
;
throw
pa
::
PerfAnalyzerException
(
pa
::
GENERIC_ERROR
);
}
FAIL_IF_ERR
(
pa
::
CustomLoadManager
::
Create
(
params_
->
async
,
params_
->
streaming
,
params_
->
measurement_window_ms
,
params_
->
max_trials
,
params_
->
request_intervals_file
,
params_
->
batch_size
,
params_
->
max_threads
,
params_
->
num_of_sequences
,
params_
->
shared_memory_type
,
params_
->
output_shm_size
,
params_
->
serial_sequences
,
parser_
,
factory
,
&
manager
),
"failed to create custom load manager"
);
}
manager
->
InitManager
(
params_
->
string_length
,
params_
->
string_data
,
params_
->
zero_input
,
params_
->
user_data
,
params_
->
start_sequence_id
,
params_
->
sequence_id_range
,
params_
->
sequence_length
,
params_
->
sequence_length_specified
,
params_
->
sequence_length_variation
);
FAIL_IF_ERR
(
pa
::
ProfileDataCollector
::
Create
(
&
collector_
),
"failed to create profile data collector"
);
FAIL_IF_ERR
(
pa
::
ProfileDataExporter
::
Create
(
&
exporter_
),
"failed to create profile data exporter"
);
FAIL_IF_ERR
(
pa
::
InferenceProfiler
::
Create
(
params_
->
verbose
,
params_
->
stability_threshold
,
params_
->
measurement_window_ms
,
params_
->
max_trials
,
params_
->
percentile
,
params_
->
latency_threshold_ms
,
params_
->
protocol
,
parser_
,
std
::
move
(
backend_
),
std
::
move
(
manager
),
&
profiler_
,
params_
->
measurement_request_count
,
params_
->
measurement_mode
,
params_
->
mpi_driver
,
params_
->
metrics_interval_ms
,
params_
->
should_collect_metrics
,
params_
->
overhead_pct_threshold
,
collector_
,
!
params_
->
profile_export_file
.
empty
()),
"failed to create profiler"
);
}
void
PerfAnalyzer
::
PrerunReport
()
{
std
::
cout
<<
"*** Measurement Settings ***"
<<
std
::
endl
;
if
(
params_
->
kind
==
cb
::
BackendKind
::
TRITON
||
params_
->
using_batch_size
)
{
std
::
cout
<<
" Batch size: "
<<
params_
->
batch_size
<<
std
::
endl
;
}
if
(
params_
->
kind
==
cb
::
BackendKind
::
TRITON_C_API
)
{
std
::
cout
<<
" Service Kind: Triton C-API"
<<
std
::
endl
;
}
else
if
(
params_
->
kind
==
cb
::
BackendKind
::
TRITON
)
{
std
::
cout
<<
" Service Kind: Triton"
<<
std
::
endl
;
}
else
if
(
params_
->
kind
==
cb
::
BackendKind
::
TORCHSERVE
)
{
std
::
cout
<<
" Service Kind: TorchServe"
<<
std
::
endl
;
}
else
if
(
params_
->
kind
==
cb
::
BackendKind
::
TENSORFLOW_SERVING
)
{
std
::
cout
<<
" Service Kind: TensorFlow Serving"
<<
std
::
endl
;
}
if
(
params_
->
measurement_mode
==
pa
::
MeasurementMode
::
COUNT_WINDOWS
)
{
std
::
cout
<<
" Using
\"
count_windows
\"
mode for stabilization"
<<
std
::
endl
;
}
else
{
std
::
cout
<<
" Using
\"
time_windows
\"
mode for stabilization"
<<
std
::
endl
;
}
if
(
params_
->
measurement_mode
==
pa
::
MeasurementMode
::
TIME_WINDOWS
)
{
std
::
cout
<<
" Measurement window: "
<<
params_
->
measurement_window_ms
<<
" msec"
<<
std
::
endl
;
}
else
if
(
params_
->
measurement_mode
==
pa
::
MeasurementMode
::
COUNT_WINDOWS
)
{
std
::
cout
<<
" Minimum number of samples in each window: "
<<
params_
->
measurement_request_count
<<
std
::
endl
;
}
if
(
params_
->
concurrency_range
.
end
!=
1
)
{
std
::
cout
<<
" Latency limit: "
<<
params_
->
latency_threshold_ms
<<
" msec"
<<
std
::
endl
;
if
(
params_
->
concurrency_range
.
end
!=
pa
::
NO_LIMIT
)
{
std
::
cout
<<
" Concurrency limit: "
<<
std
::
max
(
params_
->
concurrency_range
.
start
,
params_
->
concurrency_range
.
end
)
<<
" concurrent requests"
<<
std
::
endl
;
}
}
if
(
params_
->
request_rate_range
[
pa
::
SEARCH_RANGE
::
kEND
]
!=
1.0
)
{
std
::
cout
<<
" Latency limit: "
<<
params_
->
latency_threshold_ms
<<
" msec"
<<
std
::
endl
;
if
(
params_
->
request_rate_range
[
pa
::
SEARCH_RANGE
::
kEND
]
!=
static_cast
<
double
>
(
pa
::
NO_LIMIT
))
{
std
::
cout
<<
" Request Rate limit: "
<<
std
::
max
(
params_
->
request_rate_range
[
pa
::
SEARCH_RANGE
::
kSTART
],
params_
->
request_rate_range
[
pa
::
SEARCH_RANGE
::
kEND
])
<<
" requests per seconds"
<<
std
::
endl
;
}
}
if
(
params_
->
using_request_rate_range
)
{
if
(
params_
->
request_distribution
==
pa
::
Distribution
::
POISSON
)
{
std
::
cout
<<
" Using poisson distribution on request generation"
<<
std
::
endl
;
}
else
{
std
::
cout
<<
" Using uniform distribution on request generation"
<<
std
::
endl
;
}
}
if
(
params_
->
search_mode
==
pa
::
SearchMode
::
BINARY
)
{
std
::
cout
<<
" Using Binary Search algorithm"
<<
std
::
endl
;
}
if
(
params_
->
async
)
{
std
::
cout
<<
" Using asynchronous calls for inference"
<<
std
::
endl
;
}
else
{
std
::
cout
<<
" Using synchronous calls for inference"
<<
std
::
endl
;
}
if
(
parser_
->
IsDecoupled
())
{
std
::
cout
<<
" Detected decoupled model, using the first response for "
"measuring latency"
<<
std
::
endl
;
}
if
(
params_
->
percentile
==
-
1
)
{
std
::
cout
<<
" Stabilizing using average latency"
<<
std
::
endl
;
}
else
{
std
::
cout
<<
" Stabilizing using p"
<<
params_
->
percentile
<<
" latency"
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
}
void
PerfAnalyzer
::
Profile
()
{
params_
->
mpi_driver
->
MPIBarrierWorld
();
cb
::
Error
err
;
if
(
params_
->
targeting_concurrency
())
{
err
=
profiler_
->
Profile
<
size_t
>
(
params_
->
concurrency_range
.
start
,
params_
->
concurrency_range
.
end
,
params_
->
concurrency_range
.
step
,
params_
->
search_mode
,
perf_statuses_
);
}
else
{
err
=
profiler_
->
Profile
<
double
>
(
params_
->
request_rate_range
[
pa
::
SEARCH_RANGE
::
kSTART
],
params_
->
request_rate_range
[
pa
::
SEARCH_RANGE
::
kEND
],
params_
->
request_rate_range
[
pa
::
SEARCH_RANGE
::
kSTEP
],
params_
->
search_mode
,
perf_statuses_
);
}
params_
->
mpi_driver
->
MPIBarrierWorld
();
if
(
!
err
.
IsOk
())
{
std
::
cerr
<<
err
;
// In the case of early_exit, the thread does not return and continues to
// report the summary
if
(
!
pa
::
early_exit
)
{
throw
pa
::
PerfAnalyzerException
(
err
.
Err
());
}
}
}
void
PerfAnalyzer
::
WriteReport
()
{
if
(
!
perf_statuses_
.
size
())
{
return
;
}
// Can print more depending on verbose, but it seems too much information
std
::
cout
<<
"Inferences/Second vs. Client "
;
if
(
params_
->
percentile
==
-
1
)
{
std
::
cout
<<
"Average Batch Latency"
<<
std
::
endl
;
}
else
{
std
::
cout
<<
"p"
<<
params_
->
percentile
<<
" Batch Latency"
<<
std
::
endl
;
}
for
(
pa
::
PerfStatus
&
status
:
perf_statuses_
)
{
if
(
params_
->
targeting_concurrency
())
{
std
::
cout
<<
"Concurrency: "
<<
status
.
concurrency
<<
", "
;
}
else
{
std
::
cout
<<
"Request Rate: "
<<
status
.
request_rate
<<
", "
;
}
std
::
cout
<<
"throughput: "
<<
status
.
client_stats
.
infer_per_sec
<<
" infer/sec, latency "
<<
(
status
.
stabilizing_latency_ns
/
1000
)
<<
" usec"
<<
std
::
endl
;
}
bool
should_output_metrics
{
params_
->
should_collect_metrics
&&
params_
->
verbose_csv
};
std
::
unique_ptr
<
pa
::
ReportWriter
>
writer
;
FAIL_IF_ERR
(
pa
::
ReportWriter
::
Create
(
params_
->
filename
,
params_
->
targeting_concurrency
(),
perf_statuses_
,
params_
->
verbose_csv
,
profiler_
->
IncludeServerStats
(),
params_
->
percentile
,
parser_
,
&
writer
,
should_output_metrics
),
"failed to create report writer"
);
writer
->
GenerateReport
();
}
void
PerfAnalyzer
::
GenerateProfileExport
()
{
if
(
!
params_
->
profile_export_file
.
empty
())
{
exporter_
->
Export
(
collector_
->
GetData
(),
collector_
->
GetVersion
(),
params_
->
profile_export_file
);
}
}
void
PerfAnalyzer
::
Finalize
()
{
params_
->
mpi_driver
->
MPIFinalize
();
}
src/c++/perf_analyzer/perf_analyzer.h
0 → 100644
View file @
c68e1835
// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <getopt.h>
#include <signal.h>
#include <algorithm>
#include "command_line_parser.h"
#include "concurrency_manager.h"
#include "custom_load_manager.h"
#include "inference_profiler.h"
#include "model_parser.h"
#include "mpi_utils.h"
#include "perf_utils.h"
#include "profile_data_collector.h"
#include "profile_data_exporter.h"
// Perf Analyzer provides various metrics to measure the performance of
// the inference server. It can either be used to measure the throughput,
// latency and time distribution under specific setting (i.e. fixed batch size
// and fixed concurrent requests), or be used to generate throughput-latency
// data point under dynamic setting (i.e. collecting throughput-latency data
// under different load level).
//
// The following data is collected and used as part of the metrics:
// - Throughput (infer/sec):
// The number of inference processed per second as seen by the analyzer.
// The number of inference is measured by the multiplication of the number
// of requests and their batch size. And the total time is the time elapsed
// from when the analyzer starts sending requests to when it received
// all responses.
// - Latency (usec):
// The average elapsed time between when a request is sent and
// when the response for the request is received. If 'percentile' flag is
// specified, the selected percentile value will be reported instead of
// average value.
//
// Perf Analyzer determines the stability of throughput and latency by observing
// measurements in different trials. If the latency and throughput, are within
// the stability percentage (see --stability-percentage option) Perf Analyzer
// will report the average of the throughput and latency numbers observed in the
// last three trials. All the measurements gathered during the last three trials
// is aggregated to generate a single report. The number of total requests is
// the sum of all the requests in the individual measurement windows.
//
// There are broadly three ways to load server for the data collection using
// perf_analyzer:
// - Maintaining Target Concurrency:
// In this setting, the analyzer will maintain a target number of concurrent
// requests sent to the server (see --concurrency-range option) while
// taking measurements.
// The number of requests will be the total number of requests sent within
// the time interval for measurement (see --measurement-interval option) and
// the latency will be the average latency across all requests.
//
// Besides throughput and latency, which is measured on client side,
// the following data measured by the server will also be reported
// in this setting:
// - Concurrent request: the number of concurrent requests as specified
// in --concurrency-range option. Note, for running perf analyzer for
// a single concurrency, user must specify --concurrency-range
// <'start'>, omitting 'end' and 'step' values.
// - Batch size: the batch size of each request as specified in -b option
// - Inference count: batch size * number of inference requests
// - Cumulative time: the total time between request received and
// response sent on the requests sent by perf analyzer.
// - Average Cumulative time: cumulative time / number of inference requests
// - Compute time: the total time it takes to run inferencing including time
// copying input tensors to GPU memory, time executing the model,
// and time copying output tensors from GPU memory for the requests
// sent by perf analyzer.
// - Average compute time: compute time / number of inference requests
// - Queue time: the total time it takes to wait for an available model
// instance for the requests sent by perf analyzer.
// - Average queue time: queue time / number of inference requests
// If all fields of --concurrency-range are specified, the analyzer will
// perform the following procedure:
// 1. Follows the procedure in fixed concurrent request mode using
// k concurrent requests (k starts at 'start').
// 2. Gathers data reported from step 1.
// 3. Increases k by 'step' and repeats step 1 and 2 until latency from
// current iteration exceeds latency threshold (see --latency-threshold
// option) or concurrency level reaches 'end'. Note, by setting
// --latency-threshold or 'end' to 0 the effect of each threshold can
// be removed. However, both can not be 0 simultaneously.
// At each iteration, the data mentioned in fixed concurrent request mode
// will be reported. Besides that, after the procedure above, a collection
// of "throughput, latency, concurrent request count" tuples will be
// reported in increasing load level order.
//
// - Maintaining Target Request Rate:
// This mode is enabled only when --request-rate-range option is specified.
// Unlike above, here the analyzer will try to maintain a target rate of
// requests issued to the server while taking measurements. Rest of the
// behaviour of analyzer is identical as above. It is important to note that
// even though over a sufficiently large interval the rate of requests
// will tend to the target request rate, the actual request rate for a small
// time interval will depend upon the selected request distribution
// (--request-distribution). For 'constant' request distribution the time
// interval between successive requests is maintained to be constant, hence
// request rate is constant over time. However, 'poisson' request
// distribution varies the time interval between successive requests such
// that there are periods of bursts and nulls in request generation.
// Additionally, 'poisson' distribution mimics the real-world traffic and
// can be used to obtain measurements for a realistic-load.
// With each request-rate, the analyzer also reports the 'Delayed Request
// Count' which gives an idea of how many requests missed their schedule as
// specified by the distribution. Users can use --max-threads to increase
// the number of threads which might help in dispatching requests as per
// the schedule. Also note that a very large number of threads might be
// counter-productive with most of the time being spent on context-switching
// the threads.
//
// - Following User Provided Request Delivery Schedule:
// This mode is enabled only when --request-intervals option is specified.
// In this case, analyzer will try to dispatch the requests to the server
// with time intervals between successive requests specified in a user
// provided file. This file should contain time intervals in microseconds in
// each new line. Analyzer will loop around the values to produce a
// consistent load for measurements. Once, the readings are stabilized then
// the final statistics will be reported. The statistics will include
// 'Delayed Request Count' for the requests that missed their schedule. As
// described before, users can tune --max-threads to allow analyzer in
// keeping up with the schedule. This mode will help user in analyzing the
// performance of the server under different custom settings which may be of
// interest.
//
// By default, perf_analyzer will maintain target concurrency while measuring
// the performance.
//
// Options:
// -b: batch size for each request sent.
// --concurrency-range: The range of concurrency levels perf_analyzer will use.
// A concurrency level indicates the number of concurrent requests in queue.
// --request-rate-range: The range of request rates perf_analyzer will use to
// load the server.
// --request-intervals: File containing time intervals (in microseconds) to use
// between successive requests.
// --latency-threshold: latency threshold in msec.
// --measurement-interval: time interval for each measurement window in msec.
// --async: Enables Asynchronous inference calls.
// --binary-search: Enables binary search within the specified range.
// --request-distribution: Allows user to specify the distribution for selecting
// the time intervals between the request dispatch.
//
// For detail of the options not listed, please refer to the usage.
//
class
PerfAnalyzer
{
public:
PerfAnalyzer
(
pa
::
PAParamsPtr
params
);
virtual
~
PerfAnalyzer
(){};
// Main runner function for Perf Analyzer.
void
Run
();
private:
pa
::
PAParamsPtr
params_
;
std
::
unique_ptr
<
pa
::
InferenceProfiler
>
profiler_
;
std
::
unique_ptr
<
cb
::
ClientBackend
>
backend_
;
std
::
shared_ptr
<
pa
::
ModelParser
>
parser_
;
std
::
vector
<
pa
::
PerfStatus
>
perf_statuses_
;
std
::
shared_ptr
<
pa
::
ProfileDataCollector
>
collector_
;
std
::
shared_ptr
<
pa
::
ProfileDataExporter
>
exporter_
;
//
// Helper methods
//
// Parse the options out of the command line argument
//
void
CreateAnalyzerObjects
();
void
PrerunReport
();
void
Profile
();
void
WriteReport
();
void
GenerateProfileExport
();
void
Finalize
();
};
src/c++/perf_analyzer/perf_analyzer_exception.h
0 → 100644
View file @
c68e1835
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
#pragma once
#include <exception>
#include <string>
namespace
triton
{
namespace
perfanalyzer
{
// Perf Exception error class
//
class
PerfAnalyzerException
:
public
std
::
exception
{
public:
PerfAnalyzerException
(
uint32_t
error
)
:
error_
(
error
)
{}
PerfAnalyzerException
(
const
std
::
string
&
message
,
uint32_t
error
)
:
message_
(
message
),
error_
(
error
)
{
}
virtual
const
char
*
what
()
const
throw
()
{
return
message_
.
c_str
();
}
inline
int
GetError
()
const
{
return
error_
;
}
private:
const
std
::
string
message_
{
""
};
uint32_t
error_
;
};
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/perf_analyzer_unit_tests.cc
0 → 100644
View file @
c68e1835
// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// This file exists to hold a macro-expanded main function for the unit test
// runner executable.
//
// The old contents of main.cc are needed for the unit test runner to compile,
// but since two main functions cannot be compiled in the same executable, the
// contents of the old main.cc were moved to a new file/class, which are now
// included in the compilation of the unit test runner executable.
//
// The new contents of main.cc just include the new file/class mentioned above
// and run the primary function from there in a simplified main function, which
// runs Perf Analyzer.
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "doctest.h"
src/c++/perf_analyzer/perf_utils.cc
0 → 100644
View file @
c68e1835
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "perf_utils.h"
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <algorithm>
#include <cctype>
#include <iostream>
#include <string>
#include "client_backend/client_backend.h"
#include "doctest.h"
namespace
triton
{
namespace
perfanalyzer
{
cb
::
ProtocolType
ParseProtocol
(
const
std
::
string
&
str
)
{
std
::
string
protocol
(
str
);
std
::
transform
(
protocol
.
begin
(),
protocol
.
end
(),
protocol
.
begin
(),
::
tolower
);
if
(
protocol
==
"http"
)
{
return
cb
::
ProtocolType
::
HTTP
;
}
else
if
(
protocol
==
"grpc"
)
{
return
cb
::
ProtocolType
::
GRPC
;
}
return
cb
::
ProtocolType
::
UNKNOWN
;
}
cb
::
Error
ConvertDTypeFromTFS
(
const
std
::
string
&
tf_dtype
,
std
::
string
*
datatype
)
{
if
(
tf_dtype
==
"DT_HALF"
)
{
*
datatype
=
"FP16"
;
}
else
if
(
tf_dtype
==
"DT_BFLOAT16"
)
{
*
datatype
=
"BF16"
;
}
else
if
(
tf_dtype
==
"DT_FLOAT"
)
{
*
datatype
=
"FP32"
;
}
else
if
(
tf_dtype
==
"DT_DOUBLE"
)
{
*
datatype
=
"FP64"
;
}
else
if
(
tf_dtype
==
"DT_INT32"
)
{
*
datatype
=
"INT32"
;
}
else
if
(
tf_dtype
==
"DT_INT16"
)
{
*
datatype
=
"INT16"
;
}
else
if
(
tf_dtype
==
"DT_UINT16"
)
{
*
datatype
=
"UINT16"
;
}
else
if
(
tf_dtype
==
"DT_INT8"
)
{
*
datatype
=
"INT8"
;
}
else
if
(
tf_dtype
==
"DT_UINT8"
)
{
*
datatype
=
"UINT8"
;
}
else
if
(
tf_dtype
==
"DT_STRING"
)
{
*
datatype
=
"BYTES"
;
}
else
if
(
tf_dtype
==
"DT_INT64"
)
{
*
datatype
=
"INT64"
;
}
else
if
(
tf_dtype
==
"DT_BOOL"
)
{
*
datatype
=
"BOOL"
;
}
else
if
(
tf_dtype
==
"DT_UINT32"
)
{
*
datatype
=
"UINT32"
;
}
else
if
(
tf_dtype
==
"DT_UINT64"
)
{
*
datatype
=
"UINT64"
;
}
else
{
return
cb
::
Error
(
"unsupported datatype encountered "
+
tf_dtype
,
pa
::
GENERIC_ERROR
);
}
return
cb
::
Error
::
Success
;
}
bool
IsDirectory
(
const
std
::
string
&
path
)
{
struct
stat
s
;
if
(
stat
(
path
.
c_str
(),
&
s
)
==
0
&&
(
s
.
st_mode
&
S_IFDIR
))
{
return
true
;
}
else
{
return
false
;
}
}
bool
IsFile
(
const
std
::
string
&
complete_path
)
{
struct
stat
s
;
if
(
stat
(
complete_path
.
c_str
(),
&
s
)
==
0
&&
(
s
.
st_mode
&
S_IFREG
))
{
return
true
;
}
else
{
return
false
;
}
}
int64_t
ByteSize
(
const
std
::
vector
<
int64_t
>&
shape
,
const
std
::
string
&
datatype
)
{
int
one_element_size
;
if
((
datatype
.
compare
(
"BOOL"
)
==
0
)
||
(
datatype
.
compare
(
"INT8"
)
==
0
)
||
(
datatype
.
compare
(
"UINT8"
)
==
0
))
{
one_element_size
=
1
;
}
else
if
(
(
datatype
.
compare
(
"INT16"
)
==
0
)
||
(
datatype
.
compare
(
"UINT16"
)
==
0
)
||
(
datatype
.
compare
(
"FP16"
)
==
0
)
||
(
datatype
.
compare
(
"BF16"
)
==
0
))
{
one_element_size
=
2
;
}
else
if
(
(
datatype
.
compare
(
"INT32"
)
==
0
)
||
(
datatype
.
compare
(
"UINT32"
)
==
0
)
||
(
datatype
.
compare
(
"FP32"
)
==
0
))
{
one_element_size
=
4
;
}
else
if
(
(
datatype
.
compare
(
"INT64"
)
==
0
)
||
(
datatype
.
compare
(
"UINT64"
)
==
0
)
||
(
datatype
.
compare
(
"FP64"
)
==
0
))
{
one_element_size
=
8
;
}
else
{
return
-
1
;
}
int64_t
count
=
ElementCount
(
shape
);
if
(
count
<
0
)
{
return
count
;
}
return
(
one_element_size
*
count
);
}
int64_t
ElementCount
(
const
std
::
vector
<
int64_t
>&
shape
)
{
int64_t
count
=
1
;
bool
is_dynamic
=
false
;
for
(
const
auto
dim
:
shape
)
{
if
(
dim
==
-
1
)
{
is_dynamic
=
true
;
}
else
{
count
*=
dim
;
}
}
if
(
is_dynamic
)
{
count
=
-
1
;
}
return
count
;
}
void
SerializeStringTensor
(
std
::
vector
<
std
::
string
>
string_tensor
,
std
::
vector
<
char
>*
serialized_data
)
{
std
::
string
serialized
=
""
;
for
(
auto
s
:
string_tensor
)
{
uint32_t
len
=
s
.
size
();
serialized
.
append
(
reinterpret_cast
<
const
char
*>
(
&
len
),
sizeof
(
uint32_t
));
serialized
.
append
(
s
);
}
std
::
copy
(
serialized
.
begin
(),
serialized
.
end
(),
std
::
back_inserter
(
*
serialized_data
));
}
cb
::
Error
SerializeExplicitTensor
(
const
rapidjson
::
Value
&
tensor
,
const
std
::
string
&
dt
,
std
::
vector
<
char
>*
decoded_data
)
{
if
(
dt
.
compare
(
"BYTES"
)
==
0
)
{
std
::
string
serialized
=
""
;
for
(
const
auto
&
value
:
tensor
.
GetArray
())
{
if
(
!
value
.
IsString
())
{
return
cb
::
Error
(
"unable to find string data in json"
,
pa
::
GENERIC_ERROR
);
}
std
::
string
element
(
value
.
GetString
());
uint32_t
len
=
element
.
size
();
serialized
.
append
(
reinterpret_cast
<
const
char
*>
(
&
len
),
sizeof
(
uint32_t
));
serialized
.
append
(
element
);
}
std
::
copy
(
serialized
.
begin
(),
serialized
.
end
(),
std
::
back_inserter
(
*
decoded_data
));
}
else
{
for
(
const
auto
&
value
:
tensor
.
GetArray
())
{
if
(
dt
.
compare
(
"BOOL"
)
==
0
)
{
if
(
!
value
.
IsBool
())
{
return
cb
::
Error
(
"unable to find bool data in json"
,
pa
::
GENERIC_ERROR
);
}
bool
element
(
value
.
GetBool
());
const
char
*
src
=
reinterpret_cast
<
const
char
*>
(
&
element
);
decoded_data
->
insert
(
decoded_data
->
end
(),
src
,
src
+
sizeof
(
bool
));
}
else
if
(
dt
.
compare
(
"UINT8"
)
==
0
)
{
if
(
!
value
.
IsUint
())
{
return
cb
::
Error
(
"unable to find uint8_t data in json"
,
pa
::
GENERIC_ERROR
);
}
uint8_t
element
(
static_cast
<
uint8_t
>
(
value
.
GetUint
()));
const
char
*
src
=
reinterpret_cast
<
const
char
*>
(
&
element
);
decoded_data
->
insert
(
decoded_data
->
end
(),
src
,
src
+
sizeof
(
uint8_t
));
}
else
if
(
dt
.
compare
(
"INT8"
)
==
0
)
{
if
(
!
value
.
IsInt
())
{
return
cb
::
Error
(
"unable to find int8_t data in json"
,
pa
::
GENERIC_ERROR
);
}
int8_t
element
(
static_cast
<
int8_t
>
(
value
.
GetInt
()));
const
char
*
src
=
reinterpret_cast
<
const
char
*>
(
&
element
);
decoded_data
->
insert
(
decoded_data
->
end
(),
src
,
src
+
sizeof
(
int8_t
));
}
else
if
(
dt
.
compare
(
"UINT16"
)
==
0
)
{
if
(
!
value
.
IsUint
())
{
return
cb
::
Error
(
"unable to find uint16_t data in json"
,
pa
::
GENERIC_ERROR
);
}
uint16_t
element
(
static_cast
<
uint16_t
>
(
value
.
GetUint
()));
const
char
*
src
=
reinterpret_cast
<
const
char
*>
(
&
element
);
decoded_data
->
insert
(
decoded_data
->
end
(),
src
,
src
+
sizeof
(
uint16_t
));
}
else
if
(
dt
.
compare
(
"INT16"
)
==
0
)
{
if
(
!
value
.
IsInt
())
{
return
cb
::
Error
(
"unable to find int16_t data in json"
,
pa
::
GENERIC_ERROR
);
}
int16_t
element
(
static_cast
<
int16_t
>
(
value
.
GetInt
()));
const
char
*
src
=
reinterpret_cast
<
const
char
*>
(
&
element
);
decoded_data
->
insert
(
decoded_data
->
end
(),
src
,
src
+
sizeof
(
int16_t
));
}
else
if
(
dt
.
compare
(
"FP16"
)
==
0
)
{
return
cb
::
Error
(
"Can not use explicit tensor description for fp16 datatype"
,
pa
::
GENERIC_ERROR
);
}
else
if
(
dt
.
compare
(
"BF16"
)
==
0
)
{
return
cb
::
Error
(
"Can not use explicit tensor description for bf16 datatype"
,
pa
::
GENERIC_ERROR
);
}
else
if
(
dt
.
compare
(
"UINT32"
)
==
0
)
{
if
(
!
value
.
IsUint
())
{
return
cb
::
Error
(
"unable to find uint32_t data in json"
,
pa
::
GENERIC_ERROR
);
}
uint32_t
element
(
value
.
GetUint
());
const
char
*
src
=
reinterpret_cast
<
const
char
*>
(
&
element
);
decoded_data
->
insert
(
decoded_data
->
end
(),
src
,
src
+
sizeof
(
uint32_t
));
}
else
if
(
dt
.
compare
(
"INT32"
)
==
0
)
{
if
(
!
value
.
IsInt
())
{
return
cb
::
Error
(
"unable to find int32_t data in json"
,
pa
::
GENERIC_ERROR
);
}
int32_t
element
(
value
.
GetInt
());
const
char
*
src
=
reinterpret_cast
<
const
char
*>
(
&
element
);
decoded_data
->
insert
(
decoded_data
->
end
(),
src
,
src
+
sizeof
(
int32_t
));
}
else
if
(
dt
.
compare
(
"FP32"
)
==
0
)
{
if
(
!
value
.
IsDouble
())
{
return
cb
::
Error
(
"unable to find float data in json"
,
pa
::
GENERIC_ERROR
);
}
float
element
(
value
.
GetFloat
());
const
char
*
src
=
reinterpret_cast
<
const
char
*>
(
&
element
);
decoded_data
->
insert
(
decoded_data
->
end
(),
src
,
src
+
sizeof
(
float
));
}
else
if
(
dt
.
compare
(
"UINT64"
)
==
0
)
{
if
(
!
value
.
IsUint64
())
{
return
cb
::
Error
(
"unable to find uint64_t data in json"
,
pa
::
GENERIC_ERROR
);
}
uint64_t
element
(
value
.
GetUint64
());
const
char
*
src
=
reinterpret_cast
<
const
char
*>
(
&
element
);
decoded_data
->
insert
(
decoded_data
->
end
(),
src
,
src
+
sizeof
(
uint64_t
));
}
else
if
(
dt
.
compare
(
"INT64"
)
==
0
)
{
if
(
!
value
.
IsInt64
())
{
return
cb
::
Error
(
"unable to find int64_t data in json"
,
pa
::
GENERIC_ERROR
);
}
int64_t
element
(
value
.
GetInt64
());
const
char
*
src
=
reinterpret_cast
<
const
char
*>
(
&
element
);
decoded_data
->
insert
(
decoded_data
->
end
(),
src
,
src
+
sizeof
(
int64_t
));
}
else
if
(
dt
.
compare
(
"FP64"
)
==
0
)
{
if
(
!
value
.
IsDouble
())
{
return
cb
::
Error
(
"unable to find fp64 data in json"
,
pa
::
GENERIC_ERROR
);
}
double
element
(
value
.
GetDouble
());
const
char
*
src
=
reinterpret_cast
<
const
char
*>
(
&
element
);
decoded_data
->
insert
(
decoded_data
->
end
(),
src
,
src
+
sizeof
(
double
));
}
}
}
return
cb
::
Error
::
Success
;
}
std
::
string
GetRandomString
(
const
int
string_length
)
{
std
::
mt19937_64
gen
{
std
::
random_device
()()};
std
::
uniform_int_distribution
<
size_t
>
dist
{
0
,
character_set
.
length
()
-
1
};
std
::
string
random_string
;
std
::
generate_n
(
std
::
back_inserter
(
random_string
),
string_length
,
[
&
]
{
return
character_set
[
dist
(
gen
)];
});
return
random_string
;
}
std
::
string
ShapeVecToString
(
const
std
::
vector
<
int64_t
>
shape_vec
,
bool
skip_first
)
{
bool
first
=
true
;
std
::
string
str
(
"["
);
for
(
const
auto
&
value
:
shape_vec
)
{
if
(
skip_first
)
{
skip_first
=
false
;
continue
;
}
if
(
!
first
)
{
str
+=
","
;
}
str
+=
std
::
to_string
(
value
);
first
=
false
;
}
str
+=
"]"
;
return
str
;
}
std
::
string
TensorToRegionName
(
std
::
string
name
)
{
// Remove slashes from the name, if any.
name
.
erase
(
std
::
remove_if
(
name
.
begin
(),
name
.
end
(),
[](
const
char
&
c
)
{
return
((
c
==
'/'
)
||
(
c
==
'\\'
));
}),
name
.
end
());
return
name
;
}
template
<
>
std
::
function
<
std
::
chrono
::
nanoseconds
(
std
::
mt19937
&
)
>
ScheduleDistribution
<
Distribution
::
POISSON
>
(
const
double
request_rate
)
{
std
::
exponential_distribution
<>
dist
=
std
::
exponential_distribution
<>
(
request_rate
);
return
[
dist
](
std
::
mt19937
&
gen
)
mutable
{
return
std
::
chrono
::
duration_cast
<
std
::
chrono
::
nanoseconds
>
(
std
::
chrono
::
duration
<
double
>
(
dist
(
gen
)));
};
}
template
<
>
std
::
function
<
std
::
chrono
::
nanoseconds
(
std
::
mt19937
&
)
>
ScheduleDistribution
<
Distribution
::
CONSTANT
>
(
const
double
request_rate
)
{
std
::
chrono
::
nanoseconds
period
=
std
::
chrono
::
duration_cast
<
std
::
chrono
::
nanoseconds
>
(
std
::
chrono
::
duration
<
double
>
(
1.0
/
request_rate
));
return
[
period
](
std
::
mt19937
&
/*gen*/
)
{
return
period
;
};
}
cb
::
TensorFormat
ParseTensorFormat
(
const
std
::
string
&
content_type_str
)
{
std
::
string
content_type_str_lowercase
{
content_type_str
};
std
::
transform
(
content_type_str
.
cbegin
(),
content_type_str
.
cend
(),
content_type_str_lowercase
.
begin
(),
[](
unsigned
char
c
)
{
return
std
::
tolower
(
c
);
});
if
(
content_type_str_lowercase
==
"binary"
)
{
return
cb
::
TensorFormat
::
BINARY
;
}
else
if
(
content_type_str_lowercase
==
"json"
)
{
return
cb
::
TensorFormat
::
JSON
;
}
else
{
return
cb
::
TensorFormat
::
UNKNOWN
;
}
}
}}
// namespace triton::perfanalyzer
src/c++/perf_analyzer/perf_utils.h
0 → 100644
View file @
c68e1835
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <rapidjson/document.h>
#include <rapidjson/rapidjson.h>
#include <sys/stat.h>
#include <time.h>
#include <chrono>
#include <fstream>
#include <functional>
#include <iomanip>
#include <iostream>
#include <memory>
#include <random>
#include "client_backend/client_backend.h"
namespace
pa
=
triton
::
perfanalyzer
;
namespace
cb
=
triton
::
perfanalyzer
::
clientbackend
;
namespace
triton
{
namespace
perfanalyzer
{
constexpr
uint64_t
NANOS_PER_SECOND
=
1000000000
;
constexpr
uint64_t
NANOS_PER_MILLIS
=
1000000
;
#define CHRONO_TO_NANOS(TS) \
(std::chrono::duration_cast<std::chrono::nanoseconds>(TS.time_since_epoch()) \
.count())
#define CHRONO_TO_MILLIS(TS) (CHRONO_TO_NANOS(TS) / pa::NANOS_PER_MILLIS)
//==============================================================================
// Will use the characters specified here to construct random strings
std
::
string
const
character_set
=
"abcdefghijklmnaoqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890 .?!"
;
// A boolean flag to mark an interrupt and commencement of early exit
extern
volatile
bool
early_exit
;
enum
Distribution
{
POISSON
=
0
,
CONSTANT
=
1
,
CUSTOM
=
2
};
enum
SearchMode
{
LINEAR
=
0
,
BINARY
=
1
,
NONE
=
2
};
enum
SharedMemoryType
{
SYSTEM_SHARED_MEMORY
=
0
,
CUDA_SHARED_MEMORY
=
1
,
NO_SHARED_MEMORY
=
2
};
constexpr
uint64_t
NO_LIMIT
=
0
;
// Templated range class that tracks the start, stop, and step for a range.
//
template
<
typename
T
>
class
Range
{
public:
Range
(
T
start
,
T
end
,
T
step
)
:
start
(
start
),
end
(
end
),
step
(
step
)
{}
T
start
;
T
end
;
T
step
;
};
// Converts the datatype from tensorflow to perf analyzer space
// \param tf_dtype The data type string returned from the model metadata.
// \param datatype Returns the datatype in perf_analyzer space.
// \return error status. Returns Non-Ok if an error is encountered during
// read operation.
cb
::
Error
ConvertDTypeFromTFS
(
const
std
::
string
&
tf_dtype
,
std
::
string
*
datatype
);
// Parse the communication protocol type
cb
::
ProtocolType
ParseProtocol
(
const
std
::
string
&
str
);
// To check whether the path points to a valid system directory
bool
IsDirectory
(
const
std
::
string
&
path
);
// To check whether the path points to a valid system file
bool
IsFile
(
const
std
::
string
&
complete_path
);
// Calculates the byte size tensor for given shape and datatype.
int64_t
ByteSize
(
const
std
::
vector
<
int64_t
>&
shape
,
const
std
::
string
&
datatype
);
// Get the number of elements in the tensor for given shape.
int64_t
ElementCount
(
const
std
::
vector
<
int64_t
>&
shape
);
// Serializes the string tensor to length prepended bytes.
void
SerializeStringTensor
(
std
::
vector
<
std
::
string
>
string_tensor
,
std
::
vector
<
char
>*
serialized_data
);
// Serializes an explicit tensor read from the data file to the
// raw bytes.
cb
::
Error
SerializeExplicitTensor
(
const
rapidjson
::
Value
&
tensor
,
const
std
::
string
&
dt
,
std
::
vector
<
char
>*
decoded_data
);
// Generates a random string of specified length using characters specified in
// character_set.
std
::
string
GetRandomString
(
const
int
string_length
);
// Returns the shape string containing the values provided in the vector
std
::
string
ShapeVecToString
(
const
std
::
vector
<
int64_t
>
shape_vec
,
bool
skip_first
=
false
);
// Remove slashes from tensor name, if any
std
::
string
TensorToRegionName
(
std
::
string
name
);
// Returns the request schedule distribution generator with the specified
// request rate.
template
<
Distribution
distribution
>
std
::
function
<
std
::
chrono
::
nanoseconds
(
std
::
mt19937
&
)
>
ScheduleDistribution
(
const
double
request_rate
);
// Parse the HTTP tensor format
cb
::
TensorFormat
ParseTensorFormat
(
const
std
::
string
&
tensor_format_str
);
}}
// namespace triton::perfanalyzer
Prev
1
…
4
5
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment