Commit 8898bcf0 authored by one's avatar one
Browse files

[xcl-lens] Update verbose mode

parent 8743b701
...@@ -159,7 +159,7 @@ class RcclLogParser: ...@@ -159,7 +159,7 @@ class RcclLogParser:
df_ib.sort_values(by=["host", "rank", "hca_no", "hca_id"], inplace=True) df_ib.sort_values(by=["host", "rank", "hca_no", "hca_id"], inplace=True)
df_ib = df_ib[["host", "rank", "hca_no", "hca_id", "gdr"]] df_ib = df_ib[["host", "rank", "hca_no", "hca_id", "gdr"]]
if not self._verbose: if not self._verbose:
df_ib = df_ib.drop(columns=["host"]) df_ib = df_ib.drop(columns=["host", "rank"])
df_ib.drop_duplicates(inplace=True) df_ib.drop_duplicates(inplace=True)
print(df_ib.to_string(index=False)) print(df_ib.to_string(index=False))
print() print()
...@@ -205,14 +205,14 @@ class RcclLogParser: ...@@ -205,14 +205,14 @@ class RcclLogParser:
) )
df_gpu = df_gpu[["host", "rank", "gpu", "hca_no", "distance", "max_distance", "r/w"]] df_gpu = df_gpu[["host", "rank", "gpu", "hca_no", "distance", "max_distance", "r/w"]]
if not self._verbose: if not self._verbose:
df_gpu = df_gpu.drop(columns=["host"]) df_gpu = df_gpu.drop(columns=["host", "rank"])
df_gpu.drop_duplicates(inplace=True) df_gpu.drop_duplicates(inplace=True)
print(df_gpu.to_string(index=False)) print(df_gpu.to_string(index=False))
print() print()
else: else:
print(" (No data found)\n") print(" (No data found)\n")
def _extract_and_print(self, title, filter_func, fields, mandatory, sort_cols, move_rank=True): def _extract_and_print(self, title, filter_func, fields, mandatory, verbose_cols, sort_cols, move_rank=True):
""" """
Generic function to extract structured data from log lines and print as a table. Generic function to extract structured data from log lines and print as a table.
...@@ -235,6 +235,7 @@ class RcclLogParser: ...@@ -235,6 +235,7 @@ class RcclLogParser:
- col_name: Name of the DataFrame column - col_name: Name of the DataFrame column
- value_pattern: Regex pattern to validate/extract the field value - value_pattern: Regex pattern to validate/extract the field value
mandatory: List of column names that must not be NaN (drop rows missing these) mandatory: List of column names that must not be NaN (drop rows missing these)
verbose_cols: List of column names to keep when not verbose
sort_cols: List of column names to sort by (in order) sort_cols: List of column names to sort by (in order)
move_rank: If True, move "rank" column to front and "protocol" to second if present move_rank: If True, move "rank" column to front and "protocol" to second if present
""" """
...@@ -254,6 +255,10 @@ class RcclLogParser: ...@@ -254,6 +255,10 @@ class RcclLogParser:
rf"\b{pattern}\s+({val_pattern})", expand=False rf"\b{pattern}\s+({val_pattern})", expand=False
) )
# Drop verbose columns if not verbose
if not self._verbose:
df = df.drop(columns=verbose_cols, errors="ignore")
# Convert numeric fields to appropriate types # Convert numeric fields to appropriate types
numeric_columns = [ numeric_columns = [
"Pattern", "Pattern",
...@@ -282,9 +287,6 @@ class RcclLogParser: ...@@ -282,9 +287,6 @@ class RcclLogParser:
mandatory = [c for c in mandatory if c in df.columns] mandatory = [c for c in mandatory if c in df.columns]
df.dropna(subset=mandatory, inplace=True) # Remove rows missing mandatory fields df.dropna(subset=mandatory, inplace=True) # Remove rows missing mandatory fields
df.drop(columns=["raw_log"], inplace=True) # No longer need raw log df.drop(columns=["raw_log"], inplace=True) # No longer need raw log
if not self._verbose and "host" in df.columns:
df = df.drop(columns=["host"])
df.drop_duplicates(inplace=True) # Deduplicate identical records df.drop_duplicates(inplace=True) # Deduplicate identical records
if df.empty: if df.empty:
...@@ -293,19 +295,10 @@ class RcclLogParser: ...@@ -293,19 +295,10 @@ class RcclLogParser:
# Reorder columns for better readability # Reorder columns for better readability
if move_rank: if move_rank:
cols = df.columns.tolist() target_order = ["host", "rank", "protocol"]
if "host" in cols: leading_cols = [c for c in target_order if c in df.columns]
cols.remove("host") remaining_cols = [c for c in df.columns if c not in leading_cols]
cols.remove("rank") df = df[leading_cols + remaining_cols]
# Move protocol to second position if present
if "protocol" in cols:
cols.remove("protocol")
cols.insert(0, "protocol")
# Always move host and rank to front
cols.insert(0, "rank")
if "host" in cols:
cols.insert(0, "host")
df = df[cols]
# Sort the data # Sort the data
sort_cols = [c for c in sort_cols if c in df.columns] sort_cols = [c for c in sort_cols if c in df.columns]
...@@ -329,11 +322,38 @@ class RcclLogParser: ...@@ -329,11 +322,38 @@ class RcclLogParser:
filter_func=lambda c: "Pattern" in c and "crossNic" in c, filter_func=lambda c: "Pattern" in c and "crossNic" in c,
fields=self.graph_info_fields, fields=self.graph_info_fields,
mandatory=["Pattern"], mandatory=["Pattern"],
sort_cols=["rank", "Pattern"], verbose_cols=["host", "rank"],
sort_cols=["host", "rank", "Pattern"],
)
def _report_collective_transfers(self):
self._extract_and_print(
title="Unique Ring/Tree Transfers",
filter_func=lambda c: "protocol" in c and "nbytes" in c,
fields=self.cl_transfer_fields,
mandatory=["protocol", "nbytes"],
verbose_cols=["host", "rank"],
sort_cols=["host", "rank", "nbytes", "protocol", "nchannels"],
)
def _report_p2p_transfers(self):
self._extract_and_print(
title="Unique P2P Transfers",
filter_func=lambda c: "p2p :" in c and "send rank" in c,
fields=self.p2p_fields,
mandatory=["local", "send", "recv"],
verbose_cols=["host", "rank", "local", "send", "recv"],
sort_cols=["host", "rank", "protocol", "local", "send", "recv"],
) )
def _report_channel_transport_info(self): def _report_channel_transport_info(self):
print("===> Channel Transport Info:\n") print("===> Channel Transport Info:\n")
if not self._verbose:
print(" (Skipped because verbose mode is not enabled)")
print()
return
data = [] data = []
# Match pattern: Channel 00/0 : 2[5d000] -> 1[56000] [send] via NET/IB/6/GDRDMA # Match pattern: Channel 00/0 : 2[5d000] -> 1[56000] [send] via NET/IB/6/GDRDMA
...@@ -369,26 +389,6 @@ class RcclLogParser: ...@@ -369,26 +389,6 @@ class RcclLogParser:
df = pd.DataFrame(data) df = pd.DataFrame(data)
df.sort_values(by=["host", "rank", "channel", "sender", "receiver"], inplace=True) df.sort_values(by=["host", "rank", "channel", "sender", "receiver"], inplace=True)
if not self._verbose:
df = df.drop(columns=["host", "channel", "sender", "receiver"])
df.drop_duplicates(inplace=True) df.drop_duplicates(inplace=True)
print(df.to_string(index=False)) print(df.to_string(index=False))
print() print()
def _report_collective_transfers(self):
self._extract_and_print(
title="Unique Ring/Tree Transfers",
filter_func=lambda c: "protocol" in c and "nbytes" in c,
fields=self.cl_transfer_fields,
mandatory=["protocol", "nbytes"],
sort_cols=["rank", "nbytes", "protocol", "nchannels"],
)
def _report_p2p_transfers(self):
self._extract_and_print(
title="Unique P2P Transfers",
filter_func=lambda c: "p2p :" in c and "send rank" in c,
fields=self.p2p_fields,
mandatory=["local", "send", "recv"],
sort_cols=["rank", "protocol", "local", "send", "recv"],
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment