Unverified Commit ad5aa6bd authored by Manrique Vargas's avatar Manrique Vargas Committed by GitHub
Browse files

fix(docs): fix typos in comments and docstrings (#34836)


Signed-off-by: default avatarmachov <mv1742@nyu.edu>
parent 9681068c
...@@ -285,7 +285,7 @@ class CompilerManager: ...@@ -285,7 +285,7 @@ class CompilerManager:
with self.compile_context(compile_range): with self.compile_context(compile_range):
# There is a compilation time optimization here. # There is a compilation time optimization here.
# #
# If the (input metdata, graph, compiler config) are the same, then # If the (input metadata, graph, compiler config) are the same, then
# we want to avoid compiling the same artifact again. If we didn't # we want to avoid compiling the same artifact again. If we didn't
# do this optimization, the backend compilation (InductorAdaptor or # do this optimization, the backend compilation (InductorAdaptor or
# InductorStandaloneAdaptor) # InductorStandaloneAdaptor)
......
...@@ -420,7 +420,7 @@ def make_fp8_moe_quant_config( ...@@ -420,7 +420,7 @@ def make_fp8_moe_quant_config(
per_out_ch_quant: bool = False, per_out_ch_quant: bool = False,
) -> FusedMoEQuantConfig | None: ) -> FusedMoEQuantConfig | None:
""" """
Create FusedMoEQuantConfig for the specifed FP8 Backend. Create FusedMoEQuantConfig for the specified FP8 Backend.
The FusedMoEQuantConfig holds the scales that are used The FusedMoEQuantConfig holds the scales that are used
at runtime by the Modular Kernel abstraction. at runtime by the Modular Kernel abstraction.
......
...@@ -151,7 +151,7 @@ class DefaultMoERunner(MoERunner): ...@@ -151,7 +151,7 @@ class DefaultMoERunner(MoERunner):
kernels for different parallel execution modes. kernels for different parallel execution modes.
Eventually, this class will be split up and specialized for different Eventually, this class will be split up and specialized for different
configurations, e.g. the presense or absence of shared experts, a gate, etc. configurations, e.g. the presence or absence of shared experts, a gate, etc.
""" """
def __init__( def __init__(
......
...@@ -586,7 +586,7 @@ class GptOssModel(nn.Module): ...@@ -586,7 +586,7 @@ class GptOssModel(nn.Module):
parts = name.split(".") parts = name.split(".")
ids = [s for s in parts if s.isdigit()] ids = [s for s in parts if s.isdigit()]
# for amd-quark format that each expert is seperated # for amd-quark format that each expert is separated
# need to extract the parameter name with experts fused. # need to extract the parameter name with experts fused.
# example model: amd/gpt-oss-20b-MoE-Quant-W-MXFP4-A-FP8-KV-FP8 # example model: amd/gpt-oss-20b-MoE-Quant-W-MXFP4-A-FP8-KV-FP8
if len(ids) == 2: if len(ids) == 2:
......
...@@ -567,8 +567,8 @@ def current_stream() -> torch.cuda.Stream: ...@@ -567,8 +567,8 @@ def current_stream() -> torch.cuda.Stream:
return _current_stream_tls.value return _current_stream_tls.value
# Global auxilary stream for running operations in background streams. # Global auxiliary stream for running operations in background streams.
# We have single global auxilary stream to avoid an explosion of streams # We have single global auxiliary stream to avoid an explosion of streams
# for every layer (and make profiling look sane). # for every layer (and make profiling look sane).
# #
# aux_stream() is currently used for: # aux_stream() is currently used for:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment