kernel_name,kernel_provider,kernel_operation_mode,metric_name,metric_unit,x_name,x_label,x_value,y_value_50,y_value_20,y_value_80,extra_benchmark_config_str,gpu_name,timestamp,liger_version cross_entropy,liger,forward,speed,ms,V,vocab size,4096,0.5324159860610962,0.5291008353233337,0.53476482629776,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:39,0.2.1 cross_entropy,liger,forward,speed,ms,V,vocab size,8192,0.8101439476013184,0.7565760016441345,0.9144319891929626,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:39,0.2.1 cross_entropy,liger,forward,speed,ms,V,vocab size,16384,1.4320800304412842,1.4087040424346924,1.5254720449447632,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:39,0.2.1 cross_entropy,liger,forward,speed,ms,V,vocab size,32768,2.8378241062164307,2.805759906768799,2.9447360038757324,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:39,0.2.1 cross_entropy,liger,forward,speed,ms,V,vocab size,65536,6.805135726928711,6.790579319000244,6.98748779296875,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:39,0.2.1 cross_entropy,liger,forward,speed,ms,V,vocab size,131072,15.009359359741211,15.00483226776123,15.045599937438965,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:39,0.2.1 cross_entropy,huggingface,forward,speed,ms,V,vocab size,4096,0.8751360177993774,0.87330561876297,0.8773248195648193,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:40,0.2.1 cross_entropy,huggingface,forward,speed,ms,V,vocab size,8192,1.188480019569397,1.1871488094329834,1.1901824474334717,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:40,0.2.1 cross_entropy,huggingface,forward,speed,ms,V,vocab size,16384,1.9522240161895752,1.9451839923858643,1.962073564529419,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:40,0.2.1 cross_entropy,huggingface,forward,speed,ms,V,vocab size,32768,5.316768169403076,5.314131259918213,5.319046497344971,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:40,0.2.1 cross_entropy,huggingface,forward,speed,ms,V,vocab size,65536,10.615103721618652,10.607129096984863,10.61723518371582,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:40,0.2.1 cross_entropy,huggingface,forward,speed,ms,V,vocab size,131072,20.72643280029297,20.72038459777832,20.758554458618164,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:40,0.2.1 cross_entropy,liger,full,speed,ms,V,vocab size,4096,0.8637440204620361,0.8607680201530457,0.8670976161956787,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:41,0.2.1 cross_entropy,liger,full,speed,ms,V,vocab size,8192,1.462272047996521,1.4576319456100464,1.4661248922348022,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:41,0.2.1 cross_entropy,liger,full,speed,ms,V,vocab size,16384,2.7454559803009033,2.741612672805786,2.780428647994995,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:41,0.2.1 cross_entropy,liger,full,speed,ms,V,vocab size,32768,5.403264045715332,5.398873329162598,5.4122114181518555,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:41,0.2.1 cross_entropy,liger,full,speed,ms,V,vocab size,65536,11.925024032592773,11.919878005981445,11.92919635772705,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:41,0.2.1 cross_entropy,liger,full,speed,ms,V,vocab size,131072,25.22287940979004,25.21867561340332,25.23493766784668,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:41,0.2.1 cross_entropy,huggingface,full,speed,ms,V,vocab size,4096,2.2260000705718994,2.2239038944244385,2.2290303707122803,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,huggingface,full,speed,ms,V,vocab size,8192,3.5976319313049316,3.595616102218628,3.6007039546966553,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,huggingface,full,speed,ms,V,vocab size,16384,6.8023200035095215,6.795276641845703,6.806528091430664,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,huggingface,full,speed,ms,V,vocab size,32768,15.486032485961914,15.483936309814453,15.48681640625,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,huggingface,full,speed,ms,V,vocab size,65536,30.778079986572266,30.76335334777832,30.77827262878418,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,huggingface,full,speed,ms,V,vocab size,131072,60.43830490112305,60.43830490112305,60.43830490112305,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,liger,full,memory,MB,V,vocab size,4096,256.32861328125,256.32861328125,256.32861328125,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,liger,full,memory,MB,V,vocab size,8192,512.32861328125,512.32861328125,512.32861328125,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,liger,full,memory,MB,V,vocab size,16384,1024.32861328125,1024.32861328125,1024.32861328125,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,liger,full,memory,MB,V,vocab size,32768,2048.32861328125,2048.32861328125,2048.32861328125,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,liger,full,memory,MB,V,vocab size,65536,4096.32861328125,4096.32861328125,4096.32861328125,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,liger,full,memory,MB,V,vocab size,131072,8192.328125,8192.328125,8192.328125,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,huggingface,full,memory,MB,V,vocab size,4096,1280.1259765625,1280.1259765625,1280.1259765625,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,huggingface,full,memory,MB,V,vocab size,8192,2560.1259765625,2560.1259765625,2560.1259765625,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,huggingface,full,memory,MB,V,vocab size,16384,5120.1259765625,5120.1259765625,5120.1259765625,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,huggingface,full,memory,MB,V,vocab size,32768,10240.1259765625,10240.1259765625,10240.1259765625,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,huggingface,full,memory,MB,V,vocab size,65536,20480.125,20480.125,20480.125,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 cross_entropy,huggingface,full,memory,MB,V,vocab size,131072,40960.125,40960.125,40960.125,"{""B"": 8, ""T"": 2048}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:42,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,1024,0.04262400045990944,0.04214400053024292,0.04428799822926521,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:53,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,2048,0.04668800160288811,0.04560000076889992,0.04825599864125252,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:53,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,4096,0.0493599995970726,0.048153601586818695,0.05084799975156784,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:53,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,8192,0.05558399856090546,0.054207999259233475,0.0568000003695488,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:53,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,16384,0.061503998935222626,0.06022400036454201,0.06260479986667633,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:53,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,32768,0.06518399715423584,0.06406400352716446,0.06634879857301712,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:53,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,65536,0.06779199838638306,0.06656000018119812,0.06905599683523178,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:53,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,131072,0.07091200351715088,0.06963200122117996,0.07225599884986877,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:53,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,1024,0.16672000288963318,0.1416832059621811,0.16777600347995758,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:56,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,2048,0.14406399428844452,0.1435839980840683,0.1446399986743927,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:56,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,4096,0.1539199948310852,0.15334400534629822,0.1546431928873062,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:56,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,8192,0.1627199947834015,0.16179199516773224,0.16357119381427765,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:56,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,16384,0.1666879951953888,0.16587519645690918,0.16772480309009552,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:56,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,32768,0.1687680035829544,0.16784639656543732,0.1697216033935547,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:56,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,65536,0.16918399930000305,0.1685439944267273,0.17001600563526154,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:56,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,131072,0.17027199268341064,0.16927999258041382,0.17123199999332428,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:31:56,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,1024,0.039712000638246536,0.03798399865627289,0.04079360142350197,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:01,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,2048,0.04652800038456917,0.045318398624658585,0.04755200073122978,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:01,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,4096,0.05462399870157242,0.05361919850111008,0.05580800026655197,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:01,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,8192,0.06015999987721443,0.059487998485565186,0.06102399900555611,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:01,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,16384,0.06412799656391144,0.06329599767923355,0.06508159637451172,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:01,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,32768,0.066880002617836,0.06583040207624435,0.06777600198984146,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:01,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,65536,0.06896000355482101,0.06785280257463455,0.07009919732809067,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:01,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,131072,0.06915199756622314,0.0682239979505539,0.06998399645090103,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:01,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,1024,0.44515201449394226,0.4440639913082123,0.4463231861591339,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:05,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,2048,0.4620960056781769,0.4610239863395691,0.46300798654556274,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:05,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,4096,0.49136000871658325,0.4905087947845459,0.49270400404930115,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:05,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,8192,0.5527999997138977,0.5520448088645935,0.5538623929023743,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:05,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,16384,0.6350079774856567,0.6340479850769043,0.6363840103149414,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:05,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,32768,0.7710559964179993,0.7691839933395386,0.7727680206298828,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:05,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,65536,1.002560019493103,1.0006400346755981,1.004467248916626,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:05,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,131072,1.4482879638671875,1.4459072351455688,1.4513407945632935,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:05,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,1024,0.4537919759750366,0.4517247974872589,0.46081918478012085,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:08,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,2048,0.47407999634742737,0.4729023873806,0.47523200511932373,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:08,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,4096,0.5310080051422119,0.5298879742622375,0.5320383906364441,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:08,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,8192,0.6528639793395996,0.6514303684234619,0.6546239852905273,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:08,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,16384,0.8056960105895996,0.8048319816589355,0.807424008846283,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:08,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,32768,0.954543948173523,0.9533119797706604,0.9559999704360962,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:08,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,65536,1.1960480213165283,1.1946111917495728,1.1982656717300415,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:08,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,131072,1.642624020576477,1.6409599781036377,1.6447807550430298,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:08,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,1024,0.3001280128955841,0.29503998160362244,0.30576640367507935,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:13,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,2048,0.297760009765625,0.2938239872455597,0.3054080009460449,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:13,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,4096,0.2991679906845093,0.2956480085849762,0.3070079982280731,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:13,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,8192,0.2961280047893524,0.2899264097213745,0.3029248118400574,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:13,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,16384,0.3465920090675354,0.34563198685646057,0.3476351797580719,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:13,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,32768,0.46585598587989807,0.4641471803188324,0.4674175977706909,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:13,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,65536,0.6924160122871399,0.6907200217247009,0.6938239932060242,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:13,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,131072,1.1352640390396118,1.1327999830245972,1.1376447677612305,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:13,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,1024,0.18961599469184875,0.1879040002822876,0.19174399971961975,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:28,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,2048,0.21296000480651855,0.2112639993429184,0.21513600647449493,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:28,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,4096,0.2367040067911148,0.23467519879341125,0.23888640105724335,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:28,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,8192,0.26335999369621277,0.26099199056625366,0.2656640112400055,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:28,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,16384,0.2850880026817322,0.28336000442504883,0.2869440019130707,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:28,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,32768,0.30460798740386963,0.3023360073566437,0.30684158205986023,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:28,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,65536,0.31569600105285645,0.3138048052787781,0.3180544078350067,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:28,0.2.1 embedding,liger,forward,speed,ms,V,embedding dimension,131072,0.31988799571990967,0.31808000802993774,0.3219392001628876,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:28,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,1024,0.7865599989891052,0.7846271991729736,0.7891008257865906,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:43,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,2048,0.8262079954147339,0.8236607909202576,0.8279871940612793,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:43,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,4096,0.8446240425109863,0.8429504036903381,0.8475391864776611,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:43,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,8192,0.8540480136871338,0.8518400192260742,0.8557760119438171,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:43,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,16384,0.857695996761322,0.8553280234336853,0.8595200181007385,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:43,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,32768,0.8596479892730713,0.8576639890670776,0.8618879914283752,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:43,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,65536,1.0087039470672607,0.8624832034111023,1.0126848220825195,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:43,0.2.1 embedding,huggingface,forward,speed,ms,V,embedding dimension,131072,0.8633919954299927,0.8609600067138672,0.8647680282592773,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:43,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,1024,0.2572160065174103,0.255840003490448,0.25833600759506226,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:58,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,2048,0.2817760109901428,0.2805440127849579,0.2831552028656006,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:58,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,4096,0.30182400345802307,0.3002175986766815,0.3032831847667694,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:58,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,8192,0.3126400113105774,0.3114303946495056,0.31427839398384094,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:58,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,16384,0.3190400004386902,0.31795841455459595,0.32016000151634216,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:58,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,32768,0.32419198751449585,0.32281601428985596,0.32559359073638916,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:58,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,65536,0.3238080143928528,0.32236799597740173,0.3250240087509155,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:58,0.2.1 embedding,torch_compile,forward,speed,ms,V,embedding dimension,131072,0.3256959915161133,0.32434558868408203,0.32689279317855835,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:32:58,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,1024,2.17740797996521,2.1755776405334473,2.180025577545166,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:13,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,2048,2.2861440181732178,2.284735918045044,2.2882239818573,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:13,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,4096,2.4825921058654785,2.48024320602417,2.484800100326538,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:13,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,8192,2.74452805519104,2.7430784702301025,2.7452287673950195,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:13,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,16384,3.1216320991516113,3.1202433109283447,3.125638484954834,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:13,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,32768,3.7801599502563477,3.774118423461914,3.7824511528015137,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:13,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,65536,4.991136074066162,4.9875006675720215,4.993491172790527,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:13,0.2.1 embedding,liger,full,speed,ms,V,embedding dimension,131072,7.383471965789795,7.377497673034668,7.386828899383545,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:13,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,1024,1.5774879455566406,1.5668543577194214,1.7933248281478882,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:28,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,2048,1.7074079513549805,1.7012799978256226,1.8109056949615479,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:28,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,4096,1.950543999671936,1.9466559886932373,1.9592640399932861,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:28,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,8192,2.404927968978882,2.400460720062256,2.4551360607147217,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:28,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,16384,3.119904041290283,3.1171774864196777,3.1267263889312744,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:28,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,32768,4.32857608795166,4.321491241455078,4.439519882202148,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:28,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,65536,5.065216064453125,5.059558391571045,5.115980625152588,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:28,0.2.1 embedding,huggingface,full,speed,ms,V,embedding dimension,131072,7.489376068115234,7.484294414520264,7.5203776359558105,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:28,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,1024,1.0930559635162354,1.0918079614639282,1.0945919752120972,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:43,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,2048,1.1930559873580933,1.191705584526062,1.1951104402542114,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:43,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,4096,1.3096319437026978,1.3073855638504028,1.3119615316390991,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:43,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,8192,1.4822720289230347,1.480512022972107,1.4839999675750732,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:43,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,16384,1.7870559692382812,1.7859647274017334,1.7892736196517944,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:43,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,32768,2.3838400840759277,2.381312131881714,2.3860929012298584,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:43,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,65536,3.7430078983306885,3.740166425704956,3.745452880859375,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:43,0.2.1 embedding,torch_compile,full,speed,ms,V,embedding dimension,131072,5.940896034240723,5.934713363647461,5.943462371826172,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:43,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,1024,12348.125,12348.125,12348.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:45,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,2048,12360.125,12360.125,12360.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:45,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,4096,12384.125,12384.125,12384.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:45,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,8192,12432.125,12432.125,12432.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:45,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,16384,12528.125,12528.125,12528.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:45,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,32768,12720.125,12720.125,12720.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:45,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,65536,13104.125,13104.125,13104.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:45,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,131072,13872.125,13872.125,13872.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:45,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,1024,12356.537109375,12356.537109375,12356.537109375,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:48,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,2048,12371.359375,12371.359375,12371.359375,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:48,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,4096,12401.40625,12401.40625,12401.40625,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:48,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,8192,12461.5,12461.5,12461.5,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:48,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,16384,12581.6875,12581.6875,12581.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:48,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,32768,12773.6875,12773.6875,12773.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:48,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,65536,13157.6875,13157.6875,13157.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:48,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,131072,13925.6875,13925.6875,13925.6875,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:48,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,1024,12348.125,12348.125,12348.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:52,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,2048,12366.125,12366.125,12366.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:52,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,4096,12402.125,12402.125,12402.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:52,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,8192,12474.125,12474.125,12474.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:52,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,16384,12618.125,12618.125,12618.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:52,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,32768,12906.125,12906.125,12906.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:52,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,65536,13482.125,13482.125,13482.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:52,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,131072,14634.125,14634.125,14634.125,"{""B"": 32, ""T"": 512, ""D"": 768, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:33:52,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,1024,14346.125,14346.125,14346.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:04,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,2048,14410.125,14410.125,14410.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:04,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,4096,14538.125,14538.125,14538.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:04,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,8192,14794.125,14794.125,14794.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:04,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,16384,15306.125,15306.125,15306.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:04,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,32768,16330.125,16330.125,16330.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:04,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,65536,18378.125,18378.125,18378.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:04,0.2.1 embedding,liger,full,memory,MB,V,embedding dimension,131072,22474.125,22474.125,22474.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:04,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,1024,14388.130859375,14388.130859375,14388.130859375,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:17,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,2048,14468.154296875,14468.154296875,14468.154296875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:17,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,4096,14628.201171875,14628.201171875,14628.201171875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:17,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,8192,14948.294921875,14948.294921875,14948.294921875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:17,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,16384,15588.482421875,15588.482421875,15588.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:17,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,32768,16612.482421875,16612.482421875,16612.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:17,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,65536,18660.482421875,18660.482421875,18660.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:17,0.2.1 embedding,huggingface,full,memory,MB,V,embedding dimension,131072,22756.482421875,22756.482421875,22756.482421875,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:17,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,1024,14346.125,14346.125,14346.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:31,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,2048,14442.125,14442.125,14442.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:31,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,4096,14634.125,14634.125,14634.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:31,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,8192,15018.125,15018.125,15018.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:31,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,16384,1536.125,1536.125,1536.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:31,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,32768,3072.125,3072.125,3072.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:31,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,65536,6144.125,6144.125,6144.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:31,0.2.1 embedding,torch_compile,full,memory,MB,V,embedding dimension,131072,12288.125,12288.125,12288.125,"{""B"": 8, ""T"": 2048, ""D"": 4096, ""dtype"": ""torch.float32""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:34:31,0.2.1 fused_linear_cross_entropy,liger,forward,speed,ms,BT,B x T,4096,119.52153778076172,119.52153778076172,119.52153778076172,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:44:03,0.4.2 fused_linear_cross_entropy,liger,forward,speed,ms,BT,B x T,8192,168.08563232421875,168.08563232421875,168.08563232421875,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:44:03,0.4.2 fused_linear_cross_entropy,liger,forward,speed,ms,BT,B x T,16384,274.07342529296875,274.07342529296875,274.07342529296875,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:44:03,0.4.2 fused_linear_cross_entropy,liger,forward,speed,ms,BT,B x T,32768,508.4652099609375,508.4652099609375,508.4652099609375,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:44:03,0.4.2 fused_linear_cross_entropy,huggingface,forward,speed,ms,BT,B x T,4096,20.911680221557617,20.90903663635254,20.915321350097656,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:44:34,0.4.2 fused_linear_cross_entropy,huggingface,forward,speed,ms,BT,B x T,8192,37.97203063964844,37.9546012878418,37.989463806152344,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:44:34,0.4.2 fused_linear_cross_entropy,huggingface,forward,speed,ms,BT,B x T,16384,76.39142608642578,76.39142608642578,76.39142608642578,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:44:34,0.4.2 fused_linear_cross_entropy,huggingface,forward,speed,ms,BT,B x T,32768,151.91404724121094,151.91404724121094,151.91404724121094,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:44:34,0.4.2 fused_linear_cross_entropy,liger,full,speed,ms,BT,B x T,4096,121.43059539794922,121.43059539794922,121.43059539794922,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:45:11,0.4.2 fused_linear_cross_entropy,liger,full,speed,ms,BT,B x T,8192,166.70867919921875,166.70867919921875,166.70867919921875,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:45:11,0.4.2 fused_linear_cross_entropy,liger,full,speed,ms,BT,B x T,16384,277.1166687011719,277.1166687011719,277.1166687011719,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:45:11,0.4.2 fused_linear_cross_entropy,liger,full,speed,ms,BT,B x T,32768,511.0638732910156,511.0638732910156,511.0638732910156,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:45:11,0.4.2 fused_linear_cross_entropy,huggingface,full,speed,ms,BT,B x T,4096,55.96684646606445,55.96684646606445,55.96684646606445,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:45:46,0.4.2 fused_linear_cross_entropy,huggingface,full,speed,ms,BT,B x T,8192,111.45471954345703,111.45471954345703,111.45471954345703,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:45:46,0.4.2 fused_linear_cross_entropy,huggingface,full,speed,ms,BT,B x T,16384,220.7836151123047,220.7836151123047,220.7836151123047,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:45:46,0.4.2 fused_linear_cross_entropy,huggingface,full,speed,ms,BT,B x T,32768,452.4712829589844,452.4712829589844,452.4712829589844,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:45:46,0.4.2 fused_linear_cross_entropy,liger,full,memory,MB,BT,B x T,4096,4245.5478515625,4245.5478515625,4245.5478515625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:46:25,0.4.2 fused_linear_cross_entropy,liger,full,memory,MB,BT,B x T,8192,4466.9697265625,4466.9697265625,4466.9697265625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:46:25,0.4.2 fused_linear_cross_entropy,liger,full,memory,MB,BT,B x T,16384,4910.4384765625,4910.4384765625,4910.4384765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:46:25,0.4.2 fused_linear_cross_entropy,liger,full,memory,MB,BT,B x T,32768,5794.6259765625,5794.6259765625,5794.6259765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:46:25,0.4.2 fused_linear_cross_entropy,huggingface,full,memory,MB,BT,B x T,4096,6092.2822265625,6092.2822265625,6092.2822265625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:46:53,0.4.2 fused_linear_cross_entropy,huggingface,full,memory,MB,BT,B x T,8192,9162.3134765625,9162.3134765625,9162.3134765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:46:53,0.4.2 fused_linear_cross_entropy,huggingface,full,memory,MB,BT,B x T,16384,15302.3759765625,15302.3759765625,15302.3759765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:46:53,0.4.2 fused_linear_cross_entropy,huggingface,full,memory,MB,BT,B x T,32768,27582.5,27582.5,27582.5,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-22 17:46:53,0.4.2 geglu,liger,full,speed,ms,T,sequence length,1024,30.03536033630371,30.03536033630371,30.03536033630371,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:14,0.2.1 geglu,liger,full,speed,ms,T,sequence length,2048,54.04060745239258,54.04060745239258,54.04060745239258,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:14,0.2.1 geglu,liger,full,speed,ms,T,sequence length,4096,108.52435302734375,108.52435302734375,108.52435302734375,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:14,0.2.1 geglu,liger,full,speed,ms,T,sequence length,8192,216.6227264404297,216.6227264404297,216.6227264404297,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:14,0.2.1 geglu,huggingface,full,speed,ms,T,sequence length,1024,27.938560485839844,27.938560485839844,27.938560485839844,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:21,0.2.1 geglu,huggingface,full,speed,ms,T,sequence length,2048,54.51279830932617,54.51279830932617,54.51279830932617,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:21,0.2.1 geglu,huggingface,full,speed,ms,T,sequence length,4096,110.97718048095703,110.97718048095703,110.97718048095703,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:21,0.2.1 geglu,huggingface,full,speed,ms,T,sequence length,8192,220.93954467773438,220.93954467773438,220.93954467773438,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:21,0.2.1 geglu,liger,forward,speed,ms,T,sequence length,1024,9.280096054077148,9.280096054077148,9.280096054077148,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:26,0.2.1 geglu,liger,forward,speed,ms,T,sequence length,2048,17.59040069580078,17.59040069580078,17.59040069580078,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:26,0.2.1 geglu,liger,forward,speed,ms,T,sequence length,4096,36.18726348876953,36.18726348876953,36.18726348876953,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:26,0.2.1 geglu,liger,forward,speed,ms,T,sequence length,8192,72.60655975341797,72.60655975341797,72.60655975341797,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:26,0.2.1 geglu,huggingface,forward,speed,ms,T,sequence length,1024,9.257439613342285,9.257439613342285,9.257439613342285,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:31,0.2.1 geglu,huggingface,forward,speed,ms,T,sequence length,2048,18.099519729614258,18.099519729614258,18.099519729614258,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:31,0.2.1 geglu,huggingface,forward,speed,ms,T,sequence length,4096,36.37263870239258,36.37263870239258,36.37263870239258,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:31,0.2.1 geglu,huggingface,forward,speed,ms,T,sequence length,8192,72.66553497314453,72.66553497314453,72.66553497314453,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:31,0.2.1 geglu,liger,backward,speed,ms,T,sequence length,1024,18.088287353515625,18.088287353515625,18.088287353515625,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:37,0.2.1 geglu,liger,backward,speed,ms,T,sequence length,2048,35.195518493652344,35.195518493652344,35.195518493652344,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:37,0.2.1 geglu,liger,backward,speed,ms,T,sequence length,4096,70.51395416259766,70.51395416259766,70.51395416259766,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:37,0.2.1 geglu,liger,backward,speed,ms,T,sequence length,8192,141.28550720214844,141.28550720214844,141.28550720214844,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:37,0.2.1 geglu,huggingface,backward,speed,ms,T,sequence length,1024,18.521728515625,18.521728515625,18.521728515625,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:42,0.2.1 geglu,huggingface,backward,speed,ms,T,sequence length,2048,36.045406341552734,36.045406341552734,36.045406341552734,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:42,0.2.1 geglu,huggingface,backward,speed,ms,T,sequence length,4096,72.88412475585938,72.88412475585938,72.88412475585938,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:42,0.2.1 geglu,huggingface,backward,speed,ms,T,sequence length,8192,144.2132110595703,144.2132110595703,144.2132110595703,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:42,0.2.1 geglu,liger,full,memory,MB,T,sequence length,1024,1582.25,1582.25,1582.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:45,0.2.1 geglu,liger,full,memory,MB,T,sequence length,2048,2546.25,2546.25,2546.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:45,0.2.1 geglu,liger,full,memory,MB,T,sequence length,4096,4474.25,4474.25,4474.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:45,0.2.1 geglu,liger,full,memory,MB,T,sequence length,8192,8330.25,8330.25,8330.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:45,0.2.1 geglu,huggingface,full,memory,MB,T,sequence length,1024,1992.25,1992.25,1992.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:50,0.2.1 geglu,huggingface,full,memory,MB,T,sequence length,2048,3452.25,3452.25,3452.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:50,0.2.1 geglu,huggingface,full,memory,MB,T,sequence length,4096,6372.25,6372.25,6372.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:50,0.2.1 geglu,huggingface,full,memory,MB,T,sequence length,8192,12212.25,12212.25,12212.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:50,0.2.1 geglu,liger,forward,memory,MB,T,sequence length,1024,918.25,918.25,918.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:55,0.2.1 geglu,liger,forward,memory,MB,T,sequence length,2048,1562.25,1562.25,1562.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:55,0.2.1 geglu,liger,forward,memory,MB,T,sequence length,4096,2850.25,2850.25,2850.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:55,0.2.1 geglu,liger,forward,memory,MB,T,sequence length,8192,5426.25,5426.25,5426.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:55,0.2.1 geglu,huggingface,forward,memory,MB,T,sequence length,1024,1090.25,1090.25,1090.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:58,0.2.1 geglu,huggingface,forward,memory,MB,T,sequence length,2048,1906.25,1906.25,1906.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:58,0.2.1 geglu,huggingface,forward,memory,MB,T,sequence length,4096,3538.25,3538.25,3538.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:58,0.2.1 geglu,huggingface,forward,memory,MB,T,sequence length,8192,6802.25,6802.25,6802.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:38:58,0.2.1 geglu,liger,backward,memory,MB,T,sequence length,1024,1582.25,1582.25,1582.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:02,0.2.1 geglu,liger,backward,memory,MB,T,sequence length,2048,2546.25,2546.25,2546.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:02,0.2.1 geglu,liger,backward,memory,MB,T,sequence length,4096,4474.25,4474.25,4474.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:02,0.2.1 geglu,liger,backward,memory,MB,T,sequence length,8192,8330.25,8330.25,8330.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:02,0.2.1 geglu,huggingface,backward,memory,MB,T,sequence length,1024,1992.25,1992.25,1992.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:06,0.2.1 geglu,huggingface,backward,memory,MB,T,sequence length,2048,3452.25,3452.25,3452.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:06,0.2.1 geglu,huggingface,backward,memory,MB,T,sequence length,4096,6372.25,6372.25,6372.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:06,0.2.1 geglu,huggingface,backward,memory,MB,T,sequence length,8192,12212.25,12212.25,12212.25,"{""bsz"": 8, ""hidden_size"": 4096, ""intermediate_size"": 11008, ""hidden_act"": ""gelu_pytorch_tanh"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:06,0.2.1 layer_norm,liger,forward,speed,ms,N,hidden size,1024,0.030271999537944794,0.02921600081026554,0.03142400085926056,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:14,0.2.1 layer_norm,liger,forward,speed,ms,N,hidden size,2048,0.04992000013589859,0.04912000149488449,0.050783999264240265,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:14,0.2.1 layer_norm,liger,forward,speed,ms,N,hidden size,4096,0.08816000074148178,0.08739200234413147,0.08899199962615967,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:14,0.2.1 layer_norm,liger,forward,speed,ms,N,hidden size,8192,0.16521599888801575,0.16435199975967407,0.16627199947834015,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:14,0.2.1 layer_norm,liger,forward,speed,ms,N,hidden size,16384,0.32230401039123535,0.32070401310920715,0.32393598556518555,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:14,0.2.1 layer_norm,huggingface,forward,speed,ms,N,hidden size,1024,0.034143999218940735,0.033376000821590424,0.03580800071358681,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:17,0.2.1 layer_norm,huggingface,forward,speed,ms,N,hidden size,2048,0.05734400078654289,0.05615999922156334,0.05859199911355972,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:17,0.2.1 layer_norm,huggingface,forward,speed,ms,N,hidden size,4096,0.1218239963054657,0.12054400146007538,0.12316799908876419,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:17,0.2.1 layer_norm,huggingface,forward,speed,ms,N,hidden size,8192,0.25755199790000916,0.255840003490448,0.25939199328422546,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:17,0.2.1 layer_norm,huggingface,forward,speed,ms,N,hidden size,16384,0.5066879987716675,0.5045183897018433,0.5089280009269714,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:17,0.2.1 layer_norm,liger,full,speed,ms,N,hidden size,1024,0.28019198775291443,0.2780799865722656,0.284960001707077,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:19,0.2.1 layer_norm,liger,full,speed,ms,N,hidden size,2048,0.27827200293540955,0.27638399600982666,0.2824704051017761,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:19,0.2.1 layer_norm,liger,full,speed,ms,N,hidden size,4096,0.2847039997577667,0.27955201268196106,0.2908479869365692,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:19,0.2.1 layer_norm,liger,full,speed,ms,N,hidden size,8192,0.4405759871006012,0.43780481815338135,0.4440320134162903,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:19,0.2.1 layer_norm,liger,full,speed,ms,N,hidden size,16384,1.1488319635391235,1.1439871788024902,1.1527807712554932,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:19,0.2.1 layer_norm,huggingface,full,speed,ms,N,hidden size,1024,0.11884800344705582,0.11750400066375732,0.12035199999809265,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,huggingface,full,speed,ms,N,hidden size,2048,0.1966399997472763,0.19432319700717926,0.19888000190258026,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,huggingface,full,speed,ms,N,hidden size,4096,0.43142399191856384,0.42931199073791504,0.4336639940738678,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,huggingface,full,speed,ms,N,hidden size,8192,0.829584002494812,0.826918363571167,0.832857608795166,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,huggingface,full,speed,ms,N,hidden size,16384,1.6212799549102783,1.6171647310256958,1.6246912479400635,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,liger,full,memory,MB,N,hidden size,1024,80.90625,80.90625,80.90625,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,liger,full,memory,MB,N,hidden size,2048,161.78125,161.78125,161.78125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,liger,full,memory,MB,N,hidden size,4096,323.53125,323.53125,323.53125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,liger,full,memory,MB,N,hidden size,8192,647.03125,647.03125,647.03125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,liger,full,memory,MB,N,hidden size,16384,1294.03125,1294.03125,1294.03125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,huggingface,full,memory,MB,N,hidden size,1024,80.0625,80.0625,80.0625,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,huggingface,full,memory,MB,N,hidden size,2048,160.09375,160.09375,160.09375,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,huggingface,full,memory,MB,N,hidden size,4096,320.15625,320.15625,320.15625,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,huggingface,full,memory,MB,N,hidden size,8192,640.28125,640.28125,640.28125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 layer_norm,huggingface,full,memory,MB,N,hidden size,16384,1280.53125,1280.53125,1280.53125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:21,0.2.1 rms_norm,liger,forward,speed,ms,H,hidden size,1024,0.01360000018030405,0.012864000163972378,0.01603199914097786,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:30,0.2.1 rms_norm,liger,forward,speed,ms,H,hidden size,2048,0.019999999552965164,0.018624000251293182,0.02160000056028366,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:30,0.2.1 rms_norm,liger,forward,speed,ms,H,hidden size,4096,0.031072000041604042,0.030047999694943428,0.031968001276254654,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:30,0.2.1 rms_norm,liger,forward,speed,ms,H,hidden size,8192,0.0517439991235733,0.050624001771211624,0.05289600044488907,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:30,0.2.1 rms_norm,liger,forward,speed,ms,H,hidden size,16384,0.0952640026807785,0.0942080020904541,0.09667199850082397,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:30,0.2.1 rms_norm,liger,forward,speed,ms,H,hidden size,32768,0.18223999440670013,0.18035200238227844,0.18417279422283173,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:30,0.2.1 rms_norm,huggingface,forward,speed,ms,H,hidden size,1024,0.07820799946784973,0.0777600035071373,0.0790719985961914,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:33,0.2.1 rms_norm,huggingface,forward,speed,ms,H,hidden size,2048,0.13631999492645264,0.13555200397968292,0.13731199502944946,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:33,0.2.1 rms_norm,huggingface,forward,speed,ms,H,hidden size,4096,0.27990400791168213,0.2789439857006073,0.28118398785591125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:33,0.2.1 rms_norm,huggingface,forward,speed,ms,H,hidden size,8192,0.5190399885177612,0.5175359845161438,0.5209856033325195,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:33,0.2.1 rms_norm,huggingface,forward,speed,ms,H,hidden size,16384,0.9856320023536682,0.9835839867591858,0.9876928329467773,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:33,0.2.1 rms_norm,huggingface,forward,speed,ms,H,hidden size,32768,1.9190720319747925,1.917081594467163,1.921875238418579,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:33,0.2.1 rms_norm,liger,full,speed,ms,H,hidden size,1024,0.28601598739624023,0.2837119996547699,0.29068800806999207,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:36,0.2.1 rms_norm,liger,full,speed,ms,H,hidden size,2048,0.286624014377594,0.2845824062824249,0.2905920147895813,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:36,0.2.1 rms_norm,liger,full,speed,ms,H,hidden size,4096,0.28830400109291077,0.28533118963241577,0.2935168147087097,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:36,0.2.1 rms_norm,liger,full,speed,ms,H,hidden size,8192,0.29407998919487,0.289216011762619,0.3038719892501831,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:36,0.2.1 rms_norm,liger,full,speed,ms,H,hidden size,16384,0.410863995552063,0.4088575839996338,0.41293439269065857,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:36,0.2.1 rms_norm,liger,full,speed,ms,H,hidden size,32768,1.2316479682922363,1.228230357170105,1.235001564025879,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:36,0.2.1 rms_norm,huggingface,full,speed,ms,H,hidden size,1024,0.3176960051059723,0.3147839903831482,0.32177281379699707,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:40,0.2.1 rms_norm,huggingface,full,speed,ms,H,hidden size,2048,0.49038398265838623,0.4888896048069,0.4920639991760254,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:40,0.2.1 rms_norm,huggingface,full,speed,ms,H,hidden size,4096,1.011423945426941,1.0089855194091797,1.013759970664978,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:40,0.2.1 rms_norm,huggingface,full,speed,ms,H,hidden size,8192,1.8621759414672852,1.859769582748413,1.8646591901779175,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:40,0.2.1 rms_norm,huggingface,full,speed,ms,H,hidden size,16384,3.5439999103546143,3.5410239696502686,3.547679901123047,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:40,0.2.1 rms_norm,huggingface,full,speed,ms,H,hidden size,32768,6.910431861877441,6.907142639160156,6.914393901824951,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:40,0.2.1 rms_norm,liger,backward,speed,ms,H,hidden size,1024,0.09372799843549728,0.09177599847316742,0.09763199836015701,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:43,0.2.1 rms_norm,liger,backward,speed,ms,H,hidden size,2048,0.09030400216579437,0.08746880292892456,0.09398400038480759,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:43,0.2.1 rms_norm,liger,backward,speed,ms,H,hidden size,4096,0.09913600236177444,0.09804800152778625,0.10039679706096649,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:43,0.2.1 rms_norm,liger,backward,speed,ms,H,hidden size,8192,0.17801600694656372,0.1765120029449463,0.1793919950723648,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:43,0.2.1 rms_norm,liger,backward,speed,ms,H,hidden size,16384,0.32051199674606323,0.3187839984893799,0.32230401039123535,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:43,0.2.1 rms_norm,liger,backward,speed,ms,H,hidden size,32768,1.0562880039215088,1.053491234779358,1.059673547744751,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:43,0.2.1 rms_norm,huggingface,backward,speed,ms,H,hidden size,1024,0.19577600061893463,0.19523200392723083,0.19631999731063843,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,huggingface,backward,speed,ms,H,hidden size,2048,0.36188799142837524,0.3601599931716919,0.363647997379303,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,huggingface,backward,speed,ms,H,hidden size,4096,0.7403839826583862,0.7381759881973267,0.7426176071166992,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,huggingface,backward,speed,ms,H,hidden size,8192,1.3515520095825195,1.348736047744751,1.3550655841827393,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,huggingface,backward,speed,ms,H,hidden size,16384,2.569632053375244,2.5663681030273438,2.5731201171875,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,huggingface,backward,speed,ms,H,hidden size,32768,5.0147199630737305,5.011123180389404,5.0179901123046875,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,liger,full,memory,MB,H,hidden size,1024,36.02392578125,36.02392578125,36.02392578125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,liger,full,memory,MB,H,hidden size,2048,72.03955078125,72.03955078125,72.03955078125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,liger,full,memory,MB,H,hidden size,4096,144.07080078125,144.07080078125,144.07080078125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,liger,full,memory,MB,H,hidden size,8192,268.13330078125,268.13330078125,268.13330078125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,liger,full,memory,MB,H,hidden size,16384,432.25830078125,432.25830078125,432.25830078125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,liger,full,memory,MB,H,hidden size,32768,752.5087890625,752.5087890625,752.5087890625,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,huggingface,full,memory,MB,H,hidden size,1024,80.01953125,80.01953125,80.01953125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,huggingface,full,memory,MB,H,hidden size,2048,160.03125,160.03125,160.03125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,huggingface,full,memory,MB,H,hidden size,4096,320.0546875,320.0546875,320.0546875,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,huggingface,full,memory,MB,H,hidden size,8192,640.1015625,640.1015625,640.1015625,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,huggingface,full,memory,MB,H,hidden size,16384,1280.1953125,1280.1953125,1280.1953125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rms_norm,huggingface,full,memory,MB,H,hidden size,32768,2560.3828125,2560.3828125,2560.3828125,"{""M"": 2048, ""dtype"": ""torch.bfloat16"", ""eps"": 1e-06}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:46,0.2.1 rope,liger,forward,speed,ms,H,hidden size,512,0.011359999887645245,0.01033599954098463,0.011455999687314034,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:55,0.2.1 rope,liger,forward,speed,ms,H,hidden size,2048,0.020864000543951988,0.020447999238967896,0.02239999920129776,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:55,0.2.1 rope,liger,forward,speed,ms,H,hidden size,8192,0.059487998485565186,0.05830400064587593,0.06060799956321716,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:55,0.2.1 rope,huggingface,forward,speed,ms,H,hidden size,512,0.07968000322580338,0.07923199981451035,0.10408961027860641,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:56,0.2.1 rope,huggingface,forward,speed,ms,H,hidden size,2048,0.1570879966020584,0.15651200711727142,0.15785600244998932,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:56,0.2.1 rope,huggingface,forward,speed,ms,H,hidden size,8192,0.5167999863624573,0.5161600112915039,0.5176640152931213,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:56,0.2.1 rope,liger,backward,speed,ms,H,hidden size,512,0.12227199971675873,0.05539200082421303,0.1699904054403305,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:57,0.2.1 rope,liger,backward,speed,ms,H,hidden size,2048,0.12337599694728851,0.11945600062608719,0.15338242053985596,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:57,0.2.1 rope,liger,backward,speed,ms,H,hidden size,8192,0.12812800705432892,0.11593600362539291,0.1985855996608734,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:57,0.2.1 rope,huggingface,backward,speed,ms,H,hidden size,512,0.2648000121116638,0.2489279955625534,0.3578239977359772,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:59,0.2.1 rope,huggingface,backward,speed,ms,H,hidden size,2048,0.2536320090293884,0.24692480266094208,0.31929606199264526,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:59,0.2.1 rope,huggingface,backward,speed,ms,H,hidden size,8192,0.621504008769989,0.6208000183105469,0.6223679780960083,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:39:59,0.2.1 rope,liger,full,speed,ms,H,hidden size,512,0.27401599287986755,0.26447999477386475,0.3555007874965668,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:00,0.2.1 rope,liger,full,speed,ms,H,hidden size,2048,0.2815040051937103,0.26904961466789246,0.3562496304512024,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:00,0.2.1 rope,liger,full,speed,ms,H,hidden size,8192,0.2759679853916168,0.267244815826416,0.3601728081703186,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:00,0.2.1 rope,huggingface,full,speed,ms,H,hidden size,512,0.5160639882087708,0.5028480291366577,0.6553279757499695,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:01,0.2.1 rope,huggingface,full,speed,ms,H,hidden size,2048,0.5289119482040405,0.510598361492157,0.7208256721496582,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:01,0.2.1 rope,huggingface,full,speed,ms,H,hidden size,8192,1.1329920291900635,1.1318720579147339,1.1339199542999268,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:01,0.2.1 rope,liger,full,memory,MB,H,hidden size,512,13.26611328125,13.26611328125,13.26611328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:01,0.2.1 rope,liger,full,memory,MB,H,hidden size,2048,28.64111328125,28.64111328125,28.64111328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:01,0.2.1 rope,liger,full,memory,MB,H,hidden size,8192,90.14111328125,90.14111328125,90.14111328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:01,0.2.1 rope,huggingface,full,memory,MB,H,hidden size,512,22.26611328125,22.26611328125,22.26611328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:01,0.2.1 rope,huggingface,full,memory,MB,H,hidden size,2048,64.64111328125,64.64111328125,64.64111328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:01,0.2.1 rope,huggingface,full,memory,MB,H,hidden size,8192,234.14111328125,234.14111328125,234.14111328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:01,0.2.1 rope,liger,forward,speed,ms,T,sequence length,1024,0.034432001411914825,0.03340800106525421,0.03545600175857544,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:04,0.2.1 rope,liger,forward,speed,ms,T,sequence length,2048,0.058880001306533813,0.0578560009598732,0.059859201312065125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:04,0.2.1 rope,liger,forward,speed,ms,T,sequence length,4096,0.10899200290441513,0.10784000158309937,0.1101439967751503,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:04,0.2.1 rope,liger,forward,speed,ms,T,sequence length,8192,0.20927999913692474,0.20796799659729004,0.21059200167655945,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:04,0.2.1 rope,liger,forward,speed,ms,T,sequence length,16384,0.4105280041694641,0.4089151918888092,0.41203200817108154,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:04,0.2.1 rope,huggingface,forward,speed,ms,T,sequence length,1024,0.2808319926261902,0.28019198775291443,0.28160640597343445,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:06,0.2.1 rope,huggingface,forward,speed,ms,T,sequence length,2048,0.5160959959030151,0.5155072212219238,0.5169280171394348,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:06,0.2.1 rope,huggingface,forward,speed,ms,T,sequence length,4096,0.9947839975357056,0.9939200282096863,0.9956799745559692,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:06,0.2.1 rope,huggingface,forward,speed,ms,T,sequence length,8192,1.9332640171051025,1.9323519468307495,1.9344960451126099,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:06,0.2.1 rope,huggingface,forward,speed,ms,T,sequence length,16384,3.8169920444488525,3.815808057785034,3.8180160522460938,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:06,0.2.1 rope,liger,backward,speed,ms,T,sequence length,1024,0.1260479986667633,0.12014079838991165,0.143449604511261,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:08,0.2.1 rope,liger,backward,speed,ms,T,sequence length,2048,0.11606399714946747,0.11021439731121063,0.12432000041007996,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:08,0.2.1 rope,liger,backward,speed,ms,T,sequence length,4096,0.12409599870443344,0.11817599833011627,0.1313920021057129,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:08,0.2.1 rope,liger,backward,speed,ms,T,sequence length,8192,0.21004800498485565,0.20867200195789337,0.21164800226688385,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:08,0.2.1 rope,liger,backward,speed,ms,T,sequence length,16384,0.4102399945259094,0.40871042013168335,0.4119040071964264,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:08,0.2.1 rope,huggingface,backward,speed,ms,T,sequence length,1024,0.3304319977760315,0.3296447992324829,0.3314239978790283,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:10,0.2.1 rope,huggingface,backward,speed,ms,T,sequence length,2048,0.6213759779930115,0.6205440163612366,0.6223359704017639,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:10,0.2.1 rope,huggingface,backward,speed,ms,T,sequence length,4096,1.1872799396514893,1.1858432292938232,1.1886080503463745,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:10,0.2.1 rope,huggingface,backward,speed,ms,T,sequence length,8192,2.321280002593994,2.318873643875122,2.324160099029541,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:10,0.2.1 rope,huggingface,backward,speed,ms,T,sequence length,16384,4.557248115539551,4.550220966339111,4.560742378234863,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:10,0.2.1 rope,liger,full,speed,ms,T,sequence length,1024,0.2682560086250305,0.2641535997390747,0.2762559950351715,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:12,0.2.1 rope,liger,full,speed,ms,T,sequence length,2048,0.2654559910297394,0.26105600595474243,0.2746559977531433,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:12,0.2.1 rope,liger,full,speed,ms,T,sequence length,4096,0.2650560140609741,0.2608831822872162,0.2715519964694977,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:12,0.2.1 rope,liger,full,speed,ms,T,sequence length,8192,0.4158720076084137,0.41413119435310364,0.4178048074245453,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:12,0.2.1 rope,liger,full,speed,ms,T,sequence length,16384,0.8167039752006531,0.8143680095672607,0.8189184069633484,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:12,0.2.1 rope,huggingface,full,speed,ms,T,sequence length,1024,0.6059200167655945,0.6047679781913757,0.6072319746017456,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:14,0.2.1 rope,huggingface,full,speed,ms,T,sequence length,2048,1.1326719522476196,1.1318080425262451,1.133631944656372,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:14,0.2.1 rope,huggingface,full,speed,ms,T,sequence length,4096,2.176192045211792,2.175136089324951,2.177433729171753,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:14,0.2.1 rope,huggingface,full,speed,ms,T,sequence length,8192,4.248256206512451,4.246367931365967,4.2566399574279785,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:14,0.2.1 rope,huggingface,full,speed,ms,T,sequence length,16384,8.365951538085938,8.36348819732666,8.380928039550781,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:14,0.2.1 rope,liger,full,memory,MB,T,sequence length,1024,49.13330078125,49.13330078125,49.13330078125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:14,0.2.1 rope,liger,full,memory,MB,T,sequence length,2048,90.14111328125,90.14111328125,90.14111328125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:14,0.2.1 rope,liger,full,memory,MB,T,sequence length,4096,172.15673828125,172.15673828125,172.15673828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:14,0.2.1 rope,liger,full,memory,MB,T,sequence length,8192,336.18798828125,336.18798828125,336.18798828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:14,0.2.1 rope,liger,full,memory,MB,T,sequence length,16384,664.25048828125,664.25048828125,664.25048828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:14,0.2.1 rope,huggingface,full,memory,MB,T,sequence length,1024,121.13330078125,121.13330078125,121.13330078125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:15,0.2.1 rope,huggingface,full,memory,MB,T,sequence length,2048,234.14111328125,234.14111328125,234.14111328125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:15,0.2.1 rope,huggingface,full,memory,MB,T,sequence length,4096,460.15673828125,460.15673828125,460.15673828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:15,0.2.1 rope,huggingface,full,memory,MB,T,sequence length,8192,912.18798828125,912.18798828125,912.18798828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:15,0.2.1 rope,huggingface,full,memory,MB,T,sequence length,16384,1816.25048828125,1816.25048828125,1816.25048828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:15,0.2.1 swiglu,liger,forward,speed,ms,T,sequence length,1024,5.06441593170166,5.06441593170166,5.06441593170166,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:24,0.2.1 swiglu,liger,forward,speed,ms,T,sequence length,2048,10.075455665588379,10.075455665588379,10.075455665588379,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:24,0.2.1 swiglu,liger,forward,speed,ms,T,sequence length,4096,18.001951217651367,18.001951217651367,18.001951217651367,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:24,0.2.1 swiglu,liger,forward,speed,ms,T,sequence length,8192,35.930015563964844,35.930015563964844,35.930015563964844,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:24,0.2.1 swiglu,huggingface,forward,speed,ms,T,sequence length,1024,4.582320213317871,4.5821757316589355,4.582464218139648,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:28,0.2.1 swiglu,huggingface,forward,speed,ms,T,sequence length,2048,9.252832412719727,9.252832412719727,9.252832412719727,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:28,0.2.1 swiglu,huggingface,forward,speed,ms,T,sequence length,4096,18.160255432128906,18.160255432128906,18.160255432128906,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:28,0.2.1 swiglu,huggingface,forward,speed,ms,T,sequence length,8192,36.2911376953125,36.2911376953125,36.2911376953125,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:28,0.2.1 swiglu,liger,full,memory,MB,T,sequence length,1024,1100.25,1100.25,1100.25,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:32,0.2.1 swiglu,liger,full,memory,MB,T,sequence length,2048,1582.25,1582.25,1582.25,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:32,0.2.1 swiglu,liger,full,memory,MB,T,sequence length,4096,2546.25,2546.25,2546.25,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:32,0.2.1 swiglu,liger,full,memory,MB,T,sequence length,8192,4474.25,4474.25,4474.25,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:32,0.2.1 swiglu,huggingface,full,memory,MB,T,sequence length,1024,1294.25,1294.25,1294.25,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:36,0.2.1 swiglu,huggingface,full,memory,MB,T,sequence length,2048,1992.25,1992.25,1992.25,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:36,0.2.1 swiglu,huggingface,full,memory,MB,T,sequence length,4096,3452.25,3452.25,3452.25,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:36,0.2.1 swiglu,huggingface,full,memory,MB,T,sequence length,8192,6372.25,6372.25,6372.25,"{""B"": 4, ""hidden_size"": 4096, ""dtype"": ""torch.bfloat16"", ""intermediate_size"": 11008, ""hidden_act"": ""silu""}",NVIDIA A100-SXM4-80GB,2024-09-03 15:40:36,0.2.1 kl_div,liger,full,memory,MB,V,vocab size,4096,1536.0009765625,1536.0009765625,1536.0009765625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:40,0.2.1 kl_div,liger,full,memory,MB,V,vocab size,8192,3072.0009765625,3072.0009765625,3072.0009765625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:40,0.2.1 kl_div,liger,full,memory,MB,V,vocab size,16384,6144.0009765625,6144.0009765625,6144.0009765625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:40,0.2.1 kl_div,liger,full,memory,MB,V,vocab size,32768,12288.0009765625,12288.0009765625,12288.0009765625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:40,0.2.1 kl_div,liger,full,memory,MB,V,vocab size,65536,24576.0,24576.0,24576.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:40,0.2.1 kl_div,liger,full,memory,MB,V,vocab size,131072,49152.0,49152.0,49152.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:40,0.2.1 kl_div,torch,full,memory,MB,V,vocab size,4096,1792.0,1792.0,1792.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:41,0.2.1 kl_div,torch,full,memory,MB,V,vocab size,8192,3584.0,3584.0,3584.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:41,0.2.1 kl_div,torch,full,memory,MB,V,vocab size,16384,7168.0,7168.0,7168.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:41,0.2.1 kl_div,torch,full,memory,MB,V,vocab size,32768,14336.0,14336.0,14336.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:41,0.2.1 kl_div,torch,full,memory,MB,V,vocab size,65536,28672.0,28672.0,28672.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:41,0.2.1 kl_div,torch,full,memory,MB,V,vocab size,131072,57344.0,57344.0,57344.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:41,0.2.1 kl_div,liger,forward,speed,ms,V,vocab size,4096,0.30640000104904175,0.30563199520111084,0.30745598673820496,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:43,0.2.1 kl_div,liger,forward,speed,ms,V,vocab size,8192,0.5763360261917114,0.5754943490028381,0.5773376226425171,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:43,0.2.1 kl_div,liger,forward,speed,ms,V,vocab size,16384,1.1176480054855347,1.1165119409561157,1.1186367273330688,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:43,0.2.1 kl_div,liger,forward,speed,ms,V,vocab size,32768,2.1987199783325195,2.1970815658569336,2.200934410095215,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:43,0.2.1 kl_div,liger,forward,speed,ms,V,vocab size,65536,4.356672286987305,4.355186939239502,4.358956813812256,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:43,0.2.1 kl_div,liger,forward,speed,ms,V,vocab size,131072,8.697919845581055,8.690688133239746,8.703583717346191,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:43,0.2.1 kl_div,torch,forward,speed,ms,V,vocab size,4096,1.3298559188842773,1.3287359476089478,1.331385612487793,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:45,0.2.1 kl_div,torch,forward,speed,ms,V,vocab size,8192,2.594543933868408,2.592736005783081,2.596640110015869,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:45,0.2.1 kl_div,torch,forward,speed,ms,V,vocab size,16384,5.13375997543335,5.1324286460876465,5.1364288330078125,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:45,0.2.1 kl_div,torch,forward,speed,ms,V,vocab size,32768,10.225567817687988,10.225190162658691,10.227231979370117,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:45,0.2.1 kl_div,torch,forward,speed,ms,V,vocab size,65536,20.412960052490234,20.411020278930664,20.415000915527344,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:45,0.2.1 kl_div,torch,forward,speed,ms,V,vocab size,131072,40.818641662597656,40.816402435302734,40.82087707519531,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:45,0.2.1 kl_div,liger,full,speed,ms,V,vocab size,4096,2.040031909942627,1.9614335298538208,2.192307233810425,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:46,0.2.1 kl_div,liger,full,speed,ms,V,vocab size,8192,3.866431951522827,3.7955007553100586,3.8693249225616455,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:46,0.2.1 kl_div,liger,full,speed,ms,V,vocab size,16384,7.261951923370361,7.255136013031006,7.281760215759277,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:46,0.2.1 kl_div,liger,full,speed,ms,V,vocab size,32768,15.092127799987793,15.07801628112793,15.09660816192627,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:46,0.2.1 kl_div,liger,full,speed,ms,V,vocab size,65536,29.921375274658203,29.914867401123047,29.921951293945312,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:46,0.2.1 kl_div,liger,full,speed,ms,V,vocab size,131072,59.70220947265625,59.70220947265625,59.70220947265625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:46,0.2.1 kl_div,torch,full,speed,ms,V,vocab size,4096,2.8552000522613525,2.852755069732666,2.856454372406006,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:48,0.2.1 kl_div,torch,full,speed,ms,V,vocab size,8192,5.593632221221924,5.590988636016846,5.594636917114258,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:48,0.2.1 kl_div,torch,full,speed,ms,V,vocab size,16384,11.124671936035156,11.122162818908691,11.125061988830566,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:48,0.2.1 kl_div,torch,full,speed,ms,V,vocab size,32768,23.052032470703125,23.050334930419922,23.052589416503906,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:48,0.2.1 kl_div,torch,full,speed,ms,V,vocab size,65536,46.063167572021484,46.05990219116211,46.06643295288086,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:48,0.2.1 kl_div,torch,full,speed,ms,V,vocab size,131072,92.06393432617188,92.06393432617188,92.06393432617188,"{""B"": 8, ""T"": 2048}",NVIDIA H100 PCIe,2024-09-04 12:59:48,0.2.1 jsd,liger,full,memory,MB,V,vocab size,4096,768.0029296875,768.0029296875,768.0029296875,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:31,0.3.1 jsd,liger,full,memory,MB,V,vocab size,8192,1536.0029296875,1536.0029296875,1536.0029296875,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:31,0.3.1 jsd,liger,full,memory,MB,V,vocab size,16384,3072.0048828125,3072.0048828125,3072.0048828125,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:31,0.3.1 jsd,liger,full,memory,MB,V,vocab size,32768,6144.0087890625,6144.0087890625,6144.0087890625,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:31,0.3.1 jsd,liger,full,memory,MB,V,vocab size,65536,12288.0166015625,12288.0166015625,12288.0166015625,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:31,0.3.1 jsd,liger,full,memory,MB,V,vocab size,131072,24576.015625,24576.015625,24576.015625,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:31,0.3.1 jsd,torch,full,memory,MB,V,vocab size,4096,1664.0009765625,1664.0009765625,1664.0009765625,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:33,0.3.1 jsd,torch,full,memory,MB,V,vocab size,8192,3328.0009765625,3328.0009765625,3328.0009765625,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:33,0.3.1 jsd,torch,full,memory,MB,V,vocab size,16384,6656.0009765625,6656.0009765625,6656.0009765625,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:33,0.3.1 jsd,torch,full,memory,MB,V,vocab size,32768,13312.0009765625,13312.0009765625,13312.0009765625,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:33,0.3.1 jsd,torch,full,memory,MB,V,vocab size,65536,26624.0,26624.0,26624.0,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:33,0.3.1 jsd,torch,full,memory,MB,V,vocab size,131072,53248.0,53248.0,53248.0,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:33,0.3.1 jsd,liger,forward,speed,ms,V,vocab size,4096,0.4651840031147003,0.4636736214160919,0.4659839868545532,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:37,0.3.1 jsd,liger,forward,speed,ms,V,vocab size,8192,0.927888035774231,0.926751971244812,0.92952960729599,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:37,0.3.1 jsd,liger,forward,speed,ms,V,vocab size,16384,10.96003246307373,10.942886352539062,10.970770835876465,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:37,0.3.1 jsd,liger,forward,speed,ms,V,vocab size,32768,22.405792236328125,22.390380859375,22.41998863220215,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:37,0.3.1 jsd,liger,forward,speed,ms,V,vocab size,65536,43.49095916748047,43.47438049316406,43.50754165649414,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:37,0.3.1 jsd,liger,forward,speed,ms,V,vocab size,131072,87.0363540649414,87.0363540649414,87.0363540649414,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:37,0.3.1 jsd,torch,forward,speed,ms,V,vocab size,4096,2.4744958877563477,2.4725184440612793,2.4764864444732666,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:38,0.3.1 jsd,torch,forward,speed,ms,V,vocab size,8192,4.8528642654418945,4.851238250732422,4.854745864868164,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:38,0.3.1 jsd,torch,forward,speed,ms,V,vocab size,16384,9.532496452331543,9.528634071350098,9.535890579223633,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:38,0.3.1 jsd,torch,forward,speed,ms,V,vocab size,32768,18.91379165649414,18.911853790283203,18.919116973876953,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:38,0.3.1 jsd,torch,forward,speed,ms,V,vocab size,65536,37.70152282714844,37.70074462890625,37.70229721069336,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:38,0.3.1 jsd,torch,forward,speed,ms,V,vocab size,131072,75.37680053710938,75.37680053710938,75.37680053710938,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:38,0.3.1 jsd,liger,full,speed,ms,V,vocab size,4096,1.2074079513549805,1.1739968061447144,1.2760319709777832,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:40,0.3.1 jsd,liger,full,speed,ms,V,vocab size,8192,2.091792106628418,2.0771327018737793,2.106553554534912,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:40,0.3.1 jsd,liger,full,speed,ms,V,vocab size,16384,12.928031921386719,12.8988676071167,12.936230659484863,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:40,0.3.1 jsd,liger,full,speed,ms,V,vocab size,32768,26.55548858642578,26.550823211669922,26.570655822753906,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:40,0.3.1 jsd,liger,full,speed,ms,V,vocab size,65536,51.6833610534668,51.6833610534668,51.6833610534668,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:40,0.3.1 jsd,liger,full,speed,ms,V,vocab size,131072,103.12793731689453,103.12793731689453,103.12793731689453,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:40,0.3.1 jsd,torch,full,speed,ms,V,vocab size,4096,5.397359848022461,5.392876625061035,5.39998722076416,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:43,0.3.1 jsd,torch,full,speed,ms,V,vocab size,8192,10.60153579711914,10.597900390625,10.60470962524414,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:43,0.3.1 jsd,torch,full,speed,ms,V,vocab size,16384,20.9442081451416,20.94247055053711,20.9469051361084,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:43,0.3.1 jsd,torch,full,speed,ms,V,vocab size,32768,42.113216400146484,42.113216400146484,42.113216400146484,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:43,0.3.1 jsd,torch,full,speed,ms,V,vocab size,65536,83.9959716796875,83.9959716796875,83.9959716796875,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:43,0.3.1 jsd,torch,full,speed,ms,V,vocab size,131072,167.94175720214844,167.94175720214844,167.94175720214844,"{""B"": 4, ""T"": 2048}",NVIDIA H100 PCIe,2024-10-02 16:21:43,0.3.1 fused_linear_jsd,liger,forward,speed,ms,BT,B x T,1024,110.02185821533203,110.02185821533203,110.02185821533203,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:18,0.3.1 fused_linear_jsd,liger,forward,speed,ms,BT,B x T,2048,124.14070129394531,124.14070129394531,124.14070129394531,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:18,0.3.1 fused_linear_jsd,liger,forward,speed,ms,BT,B x T,4096,143.15420532226562,143.15420532226562,143.15420532226562,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:18,0.3.1 fused_linear_jsd,liger,forward,speed,ms,BT,B x T,8192,180.90406799316406,180.90406799316406,180.90406799316406,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:18,0.3.1 fused_linear_jsd,torch,forward,speed,ms,BT,B x T,1024,9.556896209716797,9.550745964050293,9.576268196105957,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:20,0.3.1 fused_linear_jsd,torch,forward,speed,ms,BT,B x T,2048,18.73731231689453,18.732704162597656,18.737701416015625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:20,0.3.1 fused_linear_jsd,torch,forward,speed,ms,BT,B x T,4096,37.830482482910156,37.80821990966797,37.85274124145508,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:20,0.3.1 fused_linear_jsd,torch,forward,speed,ms,BT,B x T,8192,75.15289306640625,75.15289306640625,75.15289306640625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:20,0.3.1 fused_linear_jsd,liger,full,speed,ms,BT,B x T,1024,111.16019439697266,111.16019439697266,111.16019439697266,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:24,0.3.1 fused_linear_jsd,liger,full,speed,ms,BT,B x T,2048,125.6825942993164,125.6825942993164,125.6825942993164,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:24,0.3.1 fused_linear_jsd,liger,full,speed,ms,BT,B x T,4096,144.00784301757812,144.00784301757812,144.00784301757812,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:24,0.3.1 fused_linear_jsd,liger,full,speed,ms,BT,B x T,8192,182.5832977294922,182.5832977294922,182.5832977294922,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:24,0.3.1 fused_linear_jsd,torch,full,speed,ms,BT,B x T,1024,25.977184295654297,25.968351364135742,25.989356994628906,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:27,0.3.1 fused_linear_jsd,torch,full,speed,ms,BT,B x T,2048,49.48417663574219,49.47330093383789,49.495052337646484,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:27,0.3.1 fused_linear_jsd,torch,full,speed,ms,BT,B x T,4096,98.31510162353516,98.31510162353516,98.31510162353516,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:27,0.3.1 fused_linear_jsd,torch,full,speed,ms,BT,B x T,8192,195.29539489746094,195.29539489746094,195.29539489746094,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:27,0.3.1 fused_linear_jsd,liger,full,memory,MB,BT,B x T,1024,4652.48486328125,4652.48486328125,4652.48486328125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:33,0.3.1 fused_linear_jsd,liger,full,memory,MB,BT,B x T,2048,5231.93798828125,5231.93798828125,5231.93798828125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:33,0.3.1 fused_linear_jsd,liger,full,memory,MB,BT,B x T,4096,6391.87548828125,6391.87548828125,6391.87548828125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:33,0.3.1 fused_linear_jsd,liger,full,memory,MB,BT,B x T,8192,8711.75,8711.75,8711.75,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:33,0.3.1 fused_linear_jsd,torch,full,memory,MB,BT,B x T,1024,10609.005859375,10609.005859375,10609.005859375,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:35,0.3.1 fused_linear_jsd,torch,full,memory,MB,BT,B x T,2048,17146.009765625,17146.009765625,17146.009765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:35,0.3.1 fused_linear_jsd,torch,full,memory,MB,BT,B x T,4096,30220.017578125,30220.017578125,30220.017578125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:35,0.3.1 fused_linear_jsd,torch,full,memory,MB,BT,B x T,8192,56368.015625,56368.015625,56368.015625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA H100 80GB HBM3,2024-10-09 12:29:35,0.3.1 fused_linear_orpo_loss,liger,forward,speed,ms,B,B,2,116.00621032714844,116.00621032714844,116.00621032714844,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:24:05,0.4.0 fused_linear_orpo_loss,liger,forward,speed,ms,B,B,4,230.83609008789062,230.83609008789062,230.83609008789062,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:24:05,0.4.0 fused_linear_orpo_loss,liger,forward,speed,ms,B,B,8,461.9543151855469,461.9543151855469,461.9543151855469,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:24:05,0.4.0 fused_linear_orpo_loss,liger,forward,speed,ms,B,B,16,922.994384765625,922.994384765625,922.994384765625,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:24:05,0.4.0 fused_linear_orpo_loss,huggingface,forward,speed,ms,B,B,2,39.558860778808594,39.52657699584961,39.591148376464844,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:24:36,0.4.0 fused_linear_orpo_loss,huggingface,forward,speed,ms,B,B,4,79.9734115600586,79.9734115600586,79.9734115600586,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:24:36,0.4.0 fused_linear_orpo_loss,huggingface,forward,speed,ms,B,B,8,160.071044921875,160.071044921875,160.071044921875,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:24:36,0.4.0 fused_linear_orpo_loss,huggingface,forward,speed,ms,B,B,16,321.4681091308594,321.4681091308594,321.4681091308594,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:24:36,0.4.0 fused_linear_orpo_loss,liger,full,speed,ms,B,B,2,116.56009674072266,116.56009674072266,116.56009674072266,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:25:17,0.4.0 fused_linear_orpo_loss,liger,full,speed,ms,B,B,4,232.43980407714844,232.43980407714844,232.43980407714844,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:25:17,0.4.0 fused_linear_orpo_loss,liger,full,speed,ms,B,B,8,464.5750732421875,464.5750732421875,464.5750732421875,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:25:17,0.4.0 fused_linear_orpo_loss,liger,full,speed,ms,B,B,16,926.3385009765625,926.3385009765625,926.3385009765625,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:25:17,0.4.0 fused_linear_orpo_loss,huggingface,full,speed,ms,B,B,2,120.68428802490234,120.68428802490234,120.68428802490234,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:25:58,0.4.0 fused_linear_orpo_loss,huggingface,full,speed,ms,B,B,4,241.15061950683594,241.15061950683594,241.15061950683594,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:25:58,0.4.0 fused_linear_orpo_loss,huggingface,full,speed,ms,B,B,8,492.5342102050781,492.5342102050781,492.5342102050781,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:25:58,0.4.0 fused_linear_orpo_loss,huggingface,full,speed,ms,B,B,16,1000.8460693359375,1000.8460693359375,1000.8460693359375,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:25:58,0.4.0 fused_linear_orpo_loss,liger,full,memory,MB,B,B,2,14556.626953125,14556.626953125,14556.626953125,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:26:42,0.4.0 fused_linear_orpo_loss,liger,full,memory,MB,B,B,4,14748.689453125,14748.689453125,14748.689453125,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:26:42,0.4.0 fused_linear_orpo_loss,liger,full,memory,MB,B,B,8,15132.814453125,15132.814453125,15132.814453125,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:26:42,0.4.0 fused_linear_orpo_loss,liger,full,memory,MB,B,B,16,15901.064453125,15901.064453125,15901.064453125,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:26:42,0.4.0 fused_linear_orpo_loss,huggingface,full,memory,MB,B,B,2,12488.501953125,12488.501953125,12488.501953125,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:27:10,0.4.0 fused_linear_orpo_loss,huggingface,full,memory,MB,B,B,4,19630.564453125,19630.564453125,19630.564453125,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:27:10,0.4.0 fused_linear_orpo_loss,huggingface,full,memory,MB,B,B,8,33914.6875,33914.6875,33914.6875,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:27:10,0.4.0 fused_linear_orpo_loss,huggingface,full,memory,MB,B,B,16,62482.9375,62482.9375,62482.9375,"{""T"": 4096, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 21:27:10,0.4.0 fused_linear_orpo_loss,liger,forward,speed,ms,B,B,2,31.02783966064453,31.027551651000977,31.164947509765625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:06:30,0.4.0 fused_linear_orpo_loss,liger,forward,speed,ms,B,B,4,60.88966369628906,60.88966369628906,60.88966369628906,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:06:30,0.4.0 fused_linear_orpo_loss,liger,forward,speed,ms,B,B,8,121.08070373535156,121.08070373535156,121.08070373535156,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:06:30,0.4.0 fused_linear_orpo_loss,liger,forward,speed,ms,B,B,16,244.36968994140625,244.36968994140625,244.36968994140625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:06:30,0.4.0 fused_linear_orpo_loss,huggingface,forward,speed,ms,B,B,2,12.9093599319458,12.874624252319336,12.947936058044434,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:06:57,0.4.0 fused_linear_orpo_loss,huggingface,forward,speed,ms,B,B,4,25.557632446289062,25.526700973510742,25.703763961791992,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:06:57,0.4.0 fused_linear_orpo_loss,huggingface,forward,speed,ms,B,B,8,51.75590515136719,51.75590515136719,51.75590515136719,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:06:57,0.4.0 fused_linear_orpo_loss,huggingface,forward,speed,ms,B,B,16,103.8515853881836,103.8515853881836,103.8515853881836,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:06:57,0.4.0 fused_linear_orpo_loss,liger,full,speed,ms,B,B,2,32.52537536621094,32.49258041381836,32.558170318603516,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:07:28,0.4.0 fused_linear_orpo_loss,liger,full,speed,ms,B,B,4,63.16300964355469,63.16300964355469,63.16300964355469,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:07:28,0.4.0 fused_linear_orpo_loss,liger,full,speed,ms,B,B,8,123.02518463134766,123.02518463134766,123.02518463134766,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:07:28,0.4.0 fused_linear_orpo_loss,liger,full,speed,ms,B,B,16,247.44105529785156,247.44105529785156,247.44105529785156,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:07:28,0.4.0 fused_linear_orpo_loss,huggingface,full,speed,ms,B,B,2,39.32752227783203,39.32701873779297,39.32802200317383,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:07:59,0.4.0 fused_linear_orpo_loss,huggingface,full,speed,ms,B,B,4,77.9202880859375,77.9202880859375,77.9202880859375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:07:59,0.4.0 fused_linear_orpo_loss,huggingface,full,speed,ms,B,B,8,151.6084442138672,151.6084442138672,151.6084442138672,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:07:59,0.4.0 fused_linear_orpo_loss,huggingface,full,speed,ms,B,B,16,304.4580993652344,304.4580993652344,304.4580993652344,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:07:59,0.4.0 fused_linear_orpo_loss,liger,full,memory,MB,B,B,2,8161.34619140625,8161.34619140625,8161.34619140625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:08:30,0.4.0 fused_linear_orpo_loss,liger,full,memory,MB,B,B,4,8209.361328125,8209.361328125,8209.361328125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:08:30,0.4.0 fused_linear_orpo_loss,liger,full,memory,MB,B,B,8,8305.392578125,8305.392578125,8305.392578125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:08:30,0.4.0 fused_linear_orpo_loss,liger,full,memory,MB,B,B,16,8497.455078125,8497.455078125,8497.455078125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:08:30,0.4.0 fused_linear_orpo_loss,huggingface,full,memory,MB,B,B,2,8645.314453125,8645.314453125,8645.314453125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:08:56,0.4.0 fused_linear_orpo_loss,huggingface,full,memory,MB,B,B,4,12184.330078125,12184.330078125,12184.330078125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:08:56,0.4.0 fused_linear_orpo_loss,huggingface,full,memory,MB,B,B,8,19262.361328125,19262.361328125,19262.361328125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:08:56,0.4.0 fused_linear_orpo_loss,huggingface,full,memory,MB,B,B,16,33418.421875,33418.421875,33418.421875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-13 22:08:56,0.4.0 fused_linear_cpo_loss,liger,forward,speed,ms,B,B,2,31.536447525024414,31.457439422607422,31.543052673339844,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:54:47,0.4.1 fused_linear_cpo_loss,liger,forward,speed,ms,B,B,4,62.407745361328125,62.407745361328125,62.407745361328125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:54:47,0.4.1 fused_linear_cpo_loss,liger,forward,speed,ms,B,B,8,123.64259338378906,123.64259338378906,123.64259338378906,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:54:47,0.4.1 fused_linear_cpo_loss,liger,forward,speed,ms,B,B,16,245.66575622558594,245.66575622558594,245.66575622558594,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:54:47,0.4.1 fused_linear_cpo_loss,huggingface,forward,speed,ms,B,B,2,14.516239166259766,14.514080047607422,14.52575969696045,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:55:20,0.4.1 fused_linear_cpo_loss,huggingface,forward,speed,ms,B,B,4,26.087743759155273,25.943340301513672,26.269376754760742,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:55:20,0.4.1 fused_linear_cpo_loss,huggingface,forward,speed,ms,B,B,8,51.85932922363281,51.85932922363281,51.85932922363281,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:55:20,0.4.1 fused_linear_cpo_loss,huggingface,forward,speed,ms,B,B,16,104.99673461914062,104.99673461914062,104.99673461914062,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:55:20,0.4.1 fused_linear_cpo_loss,liger,full,speed,ms,B,B,2,33.309967041015625,33.21604919433594,33.40388488769531,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:55:55,0.4.1 fused_linear_cpo_loss,liger,full,speed,ms,B,B,4,63.053470611572266,63.053470611572266,63.053470611572266,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:55:55,0.4.1 fused_linear_cpo_loss,liger,full,speed,ms,B,B,8,125.53849792480469,125.53849792480469,125.53849792480469,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:55:55,0.4.1 fused_linear_cpo_loss,liger,full,speed,ms,B,B,16,250.22178649902344,250.22178649902344,250.22178649902344,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:55:55,0.4.1 fused_linear_cpo_loss,huggingface,full,speed,ms,B,B,2,39.45849609375,39.33102798461914,39.58596420288086,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:56:30,0.4.1 fused_linear_cpo_loss,huggingface,full,speed,ms,B,B,4,77.00272369384766,77.00272369384766,77.00272369384766,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:56:30,0.4.1 fused_linear_cpo_loss,huggingface,full,speed,ms,B,B,8,154.28419494628906,154.28419494628906,154.28419494628906,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:56:30,0.4.1 fused_linear_cpo_loss,huggingface,full,speed,ms,B,B,16,309.23162841796875,309.23162841796875,309.23162841796875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:56:30,0.4.1 fused_linear_cpo_loss,liger,full,memory,MB,B,B,2,8161.34619140625,8161.34619140625,8161.34619140625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:57:06,0.4.1 fused_linear_cpo_loss,liger,full,memory,MB,B,B,4,8209.361328125,8209.361328125,8209.361328125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:57:06,0.4.1 fused_linear_cpo_loss,liger,full,memory,MB,B,B,8,8305.392578125,8305.392578125,8305.392578125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:57:06,0.4.1 fused_linear_cpo_loss,liger,full,memory,MB,B,B,16,8497.455078125,8497.455078125,8497.455078125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:57:06,0.4.1 fused_linear_cpo_loss,huggingface,full,memory,MB,B,B,2,8645.314453125,8645.314453125,8645.314453125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:57:37,0.4.1 fused_linear_cpo_loss,huggingface,full,memory,MB,B,B,4,12184.330078125,12184.330078125,12184.330078125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:57:37,0.4.1 fused_linear_cpo_loss,huggingface,full,memory,MB,B,B,8,19262.361328125,19262.361328125,19262.361328125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:57:37,0.4.1 fused_linear_cpo_loss,huggingface,full,memory,MB,B,B,16,33418.42578125,33418.42578125,33418.42578125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-14 16:57:37,0.4.1 fused_linear_simpo_loss,liger,forward,speed,ms,B,B,2,30.28438377380371,30.107013702392578,30.284786224365234,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:27:26,0.4.1 fused_linear_simpo_loss,liger,forward,speed,ms,B,B,4,58.80876922607422,58.80876922607422,58.80876922607422,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:27:26,0.4.1 fused_linear_simpo_loss,liger,forward,speed,ms,B,B,8,117.96163177490234,117.96163177490234,117.96163177490234,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:27:26,0.4.1 fused_linear_simpo_loss,liger,forward,speed,ms,B,B,16,235.60794067382812,235.60794067382812,235.60794067382812,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:27:26,0.4.1 fused_linear_simpo_loss,huggingface,forward,speed,ms,B,B,2,14.513839721679688,14.510687828063965,14.517855644226074,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:27:56,0.4.1 fused_linear_simpo_loss,huggingface,forward,speed,ms,B,B,4,28.78099250793457,28.72719383239746,28.792186737060547,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:27:56,0.4.1 fused_linear_simpo_loss,huggingface,forward,speed,ms,B,B,8,52.5733757019043,52.5733757019043,52.5733757019043,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:27:56,0.4.1 fused_linear_simpo_loss,huggingface,forward,speed,ms,B,B,16,104.44764709472656,104.44764709472656,104.44764709472656,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:27:56,0.4.1 fused_linear_simpo_loss,liger,full,speed,ms,B,B,2,31.566062927246094,31.457612991333008,31.674514770507812,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:28:27,0.4.1 fused_linear_simpo_loss,liger,full,speed,ms,B,B,4,61.4403190612793,61.4403190612793,61.4403190612793,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:28:27,0.4.1 fused_linear_simpo_loss,liger,full,speed,ms,B,B,8,119.97705841064453,119.97705841064453,119.97705841064453,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:28:27,0.4.1 fused_linear_simpo_loss,liger,full,speed,ms,B,B,16,238.13417053222656,238.13417053222656,238.13417053222656,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:28:27,0.4.1 fused_linear_simpo_loss,huggingface,full,speed,ms,B,B,2,39.811119079589844,39.65474319458008,39.96749496459961,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:29:00,0.4.1 fused_linear_simpo_loss,huggingface,full,speed,ms,B,B,4,77.20928192138672,77.20928192138672,77.20928192138672,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:29:00,0.4.1 fused_linear_simpo_loss,huggingface,full,speed,ms,B,B,8,153.6952667236328,153.6952667236328,153.6952667236328,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:29:00,0.4.1 fused_linear_simpo_loss,huggingface,full,speed,ms,B,B,16,307.7382507324219,307.7382507324219,307.7382507324219,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:29:00,0.4.1 fused_linear_simpo_loss,liger,full,memory,MB,B,B,2,7675.3291015625,7675.3291015625,7675.3291015625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:29:33,0.4.1 fused_linear_simpo_loss,liger,full,memory,MB,B,B,4,7723.3447265625,7723.3447265625,7723.3447265625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:29:33,0.4.1 fused_linear_simpo_loss,liger,full,memory,MB,B,B,8,7819.3759765625,7819.3759765625,7819.3759765625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:29:33,0.4.1 fused_linear_simpo_loss,liger,full,memory,MB,B,B,16,8011.4384765625,8011.4384765625,8011.4384765625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:29:33,0.4.1 fused_linear_simpo_loss,huggingface,full,memory,MB,B,B,2,8645.314453125,8645.314453125,8645.314453125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:30:01,0.4.1 fused_linear_simpo_loss,huggingface,full,memory,MB,B,B,4,12184.330078125,12184.330078125,12184.330078125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:30:01,0.4.1 fused_linear_simpo_loss,huggingface,full,memory,MB,B,B,8,19262.361328125,19262.361328125,19262.361328125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:30:01,0.4.1 fused_linear_simpo_loss,huggingface,full,memory,MB,B,B,16,33418.42578125,33418.42578125,33418.42578125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2024-11-15 14:30:01,0.4.1 distill_jsd_loss,liger,forward,speed,ms,BT,B x T,1024,7.735536098480225,7.729177474975586,7.798131465911865,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:58:46,0.4.2 distill_jsd_loss,liger,forward,speed,ms,BT,B x T,2048,15.20411205291748,15.165056228637695,15.226079940795898,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:58:46,0.4.2 distill_jsd_loss,liger,forward,speed,ms,BT,B x T,4096,30.159456253051758,30.126911163330078,30.165311813354492,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:58:46,0.4.2 distill_jsd_loss,liger,forward,speed,ms,BT,B x T,8192,60.24163055419922,60.24163055419922,60.24163055419922,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:58:46,0.4.2 distill_jsd_loss,torch,forward,speed,ms,BT,B x T,1024,10.906111717224121,10.903244972229004,10.91296672821045,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:59:18,0.4.2 distill_jsd_loss,torch,forward,speed,ms,BT,B x T,2048,21.480207443237305,21.465139389038086,21.489286422729492,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:59:18,0.4.2 distill_jsd_loss,torch,forward,speed,ms,BT,B x T,4096,42.96339416503906,42.96237564086914,42.96440887451172,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:59:18,0.4.2 distill_jsd_loss,torch,forward,speed,ms,BT,B x T,8192,85.3946533203125,85.3946533203125,85.3946533203125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:59:18,0.4.2 distill_jsd_loss,liger,full,speed,ms,BT,B x T,1024,8.312895774841309,8.310400009155273,8.326751708984375,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:59:51,0.4.2 distill_jsd_loss,liger,full,speed,ms,BT,B x T,2048,15.770208358764648,15.767775535583496,15.774784088134766,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:59:51,0.4.2 distill_jsd_loss,liger,full,speed,ms,BT,B x T,4096,30.922752380371094,30.920312881469727,30.927898406982422,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:59:51,0.4.2 distill_jsd_loss,liger,full,speed,ms,BT,B x T,8192,60.70627212524414,60.70627212524414,60.70627212524414,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 07:59:51,0.4.2 distill_jsd_loss,torch,full,speed,ms,BT,B x T,1024,28.72480010986328,28.718809127807617,28.728179931640625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:00:25,0.4.2 distill_jsd_loss,torch,full,speed,ms,BT,B x T,2048,54.281761169433594,54.281761169433594,54.281761169433594,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:00:25,0.4.2 distill_jsd_loss,torch,full,speed,ms,BT,B x T,4096,107.08905792236328,107.08905792236328,107.08905792236328,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:00:25,0.4.2 distill_jsd_loss,torch,full,speed,ms,BT,B x T,8192,213.1598663330078,213.1598663330078,213.1598663330078,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:00:25,0.4.2 distill_jsd_loss,liger,full,memory,MB,BT,B x T,1024,10913.541015625,10913.541015625,10913.541015625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:00:58,0.4.2 distill_jsd_loss,liger,full,memory,MB,BT,B x T,2048,10941.548828125,10941.548828125,10941.548828125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:00:58,0.4.2 distill_jsd_loss,liger,full,memory,MB,BT,B x T,4096,10997.564453125,10997.564453125,10997.564453125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:00:58,0.4.2 distill_jsd_loss,liger,full,memory,MB,BT,B x T,8192,11109.595703125,11109.595703125,11109.595703125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:00:58,0.4.2 distill_jsd_loss,torch,full,memory,MB,BT,B x T,1024,16174.0390625,16174.0390625,16174.0390625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:01:32,0.4.2 distill_jsd_loss,torch,full,memory,MB,BT,B x T,2048,23713.05078125,23713.05078125,23713.05078125,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:01:32,0.4.2 distill_jsd_loss,torch,full,memory,MB,BT,B x T,4096,38791.07421875,38791.07421875,38791.07421875,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:01:32,0.4.2 distill_jsd_loss,torch,full,memory,MB,BT,B x T,8192,68947.1015625,68947.1015625,68947.1015625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA H100 80GB HBM3,2024-12-03 08:01:32,0.4.2 kto_loss,liger,forward,speed,ms,B,Batch Size (B),2,3.9951679706573486,3.991487979888916,4.002252578735352,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:22:44,0.5.4 kto_loss,liger,forward,speed,ms,B,Batch Size (B),4,7.8037919998168945,7.788575649261475,7.808595180511475,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:22:44,0.5.4 kto_loss,liger,forward,speed,ms,B,Batch Size (B),8,15.43172836303711,15.430015563964844,15.4335355758667,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:22:44,0.5.4 kto_loss,liger,forward,speed,ms,B,Batch Size (B),16,30.66864013671875,30.66431999206543,30.670501708984375,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:22:44,0.5.4 kto_loss,liger,forward,speed,ms,B,Batch Size (B),32,61.1163215637207,61.1163215637207,61.1163215637207,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:22:44,0.5.4 kto_loss,huggingface,forward,speed,ms,B,Batch Size (B),2,3.8766400814056396,3.8680384159088135,3.8897151947021484,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:01,0.5.4 kto_loss,huggingface,forward,speed,ms,B,Batch Size (B),4,7.213727951049805,7.206470489501953,7.229574680328369,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:01,0.5.4 kto_loss,huggingface,forward,speed,ms,B,Batch Size (B),8,13.828800201416016,13.810944557189941,13.834943771362305,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:01,0.5.4 kto_loss,huggingface,forward,speed,ms,B,Batch Size (B),16,27.0930233001709,27.08517074584961,27.09713363647461,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:01,0.5.4 kto_loss,huggingface,forward,speed,ms,B,Batch Size (B),32,54.13715362548828,54.13715362548828,54.13715362548828,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:01,0.5.4 kto_loss,liger,full,speed,ms,B,Batch Size (B),2,4.782928466796875,4.677459239959717,5.3430914878845215,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:18,0.5.4 kto_loss,liger,full,speed,ms,B,Batch Size (B),4,8.517248153686523,8.481344223022461,8.561504364013672,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:18,0.5.4 kto_loss,liger,full,speed,ms,B,Batch Size (B),8,16.547504425048828,16.513471603393555,16.678144454956055,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:18,0.5.4 kto_loss,liger,full,speed,ms,B,Batch Size (B),16,31.891263961791992,31.819705963134766,32.274131774902344,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:18,0.5.4 kto_loss,liger,full,speed,ms,B,Batch Size (B),32,62.953758239746094,62.953758239746094,62.953758239746094,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:18,0.5.4 kto_loss,huggingface,full,speed,ms,B,Batch Size (B),2,6.201632022857666,6.163315296173096,6.314668655395508,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:35,0.5.4 kto_loss,huggingface,full,speed,ms,B,Batch Size (B),4,11.156224250793457,11.142304420471191,11.207296371459961,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:35,0.5.4 kto_loss,huggingface,full,speed,ms,B,Batch Size (B),8,21.249855041503906,21.231891632080078,21.264543533325195,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:35,0.5.4 kto_loss,huggingface,full,speed,ms,B,Batch Size (B),16,41.55686569213867,41.536956787109375,41.57677459716797,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:35,0.5.4 kto_loss,huggingface,full,speed,ms,B,Batch Size (B),32,81.56924438476562,81.56924438476562,81.56924438476562,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:35,0.5.4 kto_loss,liger,full,memory,MB,B,Batch Size (B),2,2585.73876953125,2585.73876953125,2585.73876953125,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:55,0.5.4 kto_loss,liger,full,memory,MB,B,Batch Size (B),4,3348.9892578125,3348.9892578125,3348.9892578125,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:55,0.5.4 kto_loss,liger,full,memory,MB,B,Batch Size (B),8,3361.0048828125,3361.0048828125,3361.0048828125,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:55,0.5.4 kto_loss,liger,full,memory,MB,B,Batch Size (B),16,3385.0361328125,3385.0361328125,3385.0361328125,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:55,0.5.4 kto_loss,liger,full,memory,MB,B,Batch Size (B),32,3433.0986328125,3433.0986328125,3433.0986328125,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:23:55,0.5.4 kto_loss,huggingface,full,memory,MB,B,Batch Size (B),2,4341.74951171875,4341.74951171875,4341.74951171875,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:24:11,0.5.4 kto_loss,huggingface,full,memory,MB,B,Batch Size (B),4,6099.26513671875,6099.26513671875,6099.26513671875,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:24:11,0.5.4 kto_loss,huggingface,full,memory,MB,B,Batch Size (B),8,9613.298828125,9613.298828125,9613.298828125,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:24:11,0.5.4 kto_loss,huggingface,full,memory,MB,B,Batch Size (B),16,16643.365234375,16643.365234375,16643.365234375,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:24:11,0.5.4 kto_loss,huggingface,full,memory,MB,B,Batch Size (B),32,30703.498046875,30703.498046875,30703.498046875,"{""T"": 512, ""H"": 1024, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": true, ""beta"": 0.1, ""ignore_index"": 42}",NVIDIA H100 80GB HBM3,2025-03-03 08:24:11,0.5.4 sparsemax,liger,forward,speed,ms,V,feature size,1024,0.41471999883651733,0.4126720130443573,0.42393600940704346,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:08,0.5.8 sparsemax,liger,forward,speed,ms,V,feature size,2048,0.7608320116996765,0.7598080039024353,0.7628800272941589,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:08,0.5.8 sparsemax,liger,forward,speed,ms,V,feature size,4096,1.4561280012130737,1.4540799856185913,1.4581760168075562,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:08,0.5.8 sparsemax,liger,forward,speed,ms,V,feature size,8192,5.288959980010986,5.2848639488220215,5.29986572265625,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:08,0.5.8 sparsemax,liger,forward,speed,ms,V,feature size,16384,10.734624862670898,10.729472160339355,11.096882820129395,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:08,0.5.8 sparsemax,liger,forward,speed,ms,V,feature size,32768,21.729312896728516,21.7128963470459,22.20728302001953,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:08,0.5.8 sparsemax,torch,forward,speed,ms,V,feature size,1024,0.42291200160980225,0.42188799381256104,0.42393600940704346,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:12,0.5.8 sparsemax,torch,forward,speed,ms,V,feature size,2048,0.7782400250434875,0.7772160172462463,0.779263973236084,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:12,0.5.8 sparsemax,torch,forward,speed,ms,V,feature size,4096,1.4940160512924194,1.491968035697937,1.4960639476776123,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:12,0.5.8 sparsemax,torch,forward,speed,ms,V,feature size,8192,5.359615802764893,5.356544017791748,5.366579055786133,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:12,0.5.8 sparsemax,torch,forward,speed,ms,V,feature size,16384,10.883584022521973,10.874879837036133,11.224268913269043,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:12,0.5.8 sparsemax,torch,forward,speed,ms,V,feature size,32768,22.19878387451172,22.018457412719727,22.48888397216797,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:12,0.5.8 sparsemax,liger,full,speed,ms,V,feature size,1024,0.4558719992637634,0.45558398962020874,0.45772799849510193,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:16,0.5.8 sparsemax,liger,full,speed,ms,V,feature size,2048,0.8488960266113281,0.8478720188140869,0.8509439826011658,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:16,0.5.8 sparsemax,liger,full,speed,ms,V,feature size,4096,1.6476160287857056,1.6465920209884644,1.6499264240264893,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:16,0.5.8 sparsemax,liger,full,speed,ms,V,feature size,8192,5.664768218994141,5.660672187805176,5.681356906890869,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:16,0.5.8 sparsemax,liger,full,speed,ms,V,feature size,16384,11.486207962036133,11.478015899658203,11.874713897705078,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:16,0.5.8 sparsemax,liger,full,speed,ms,V,feature size,32768,23.457279205322266,23.289682388305664,23.76642608642578,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:16,0.5.8 sparsemax,torch,full,speed,ms,V,feature size,1024,0.6021119952201843,0.6010879874229431,0.6041600108146667,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:20,0.5.8 sparsemax,torch,full,speed,ms,V,feature size,2048,1.1212799549102783,1.119264006614685,1.1223039627075195,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:20,0.5.8 sparsemax,torch,full,speed,ms,V,feature size,4096,2.1637120246887207,2.1616640090942383,2.165760040283203,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:20,0.5.8 sparsemax,torch,full,speed,ms,V,feature size,8192,6.693888187408447,6.68723201751709,6.705561637878418,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:20,0.5.8 sparsemax,torch,full,speed,ms,V,feature size,16384,13.523456573486328,13.518848419189453,13.878681182861328,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:20,0.5.8 sparsemax,torch,full,speed,ms,V,feature size,32768,27.604991912841797,27.295129776000977,27.77518081665039,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:20,0.5.8 sparsemax,liger,backward,speed,ms,V,feature size,1024,0.04403200000524521,0.043007999658584595,0.05222399905323982,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:22,0.5.8 sparsemax,liger,backward,speed,ms,V,feature size,2048,0.08806400001049042,0.08713600039482117,0.08806400001049042,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:22,0.5.8 sparsemax,liger,backward,speed,ms,V,feature size,4096,0.1884160041809082,0.1884160041809082,0.18943999707698822,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:22,0.5.8 sparsemax,liger,backward,speed,ms,V,feature size,8192,0.374783992767334,0.37376001477241516,0.37486720085144043,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:22,0.5.8 sparsemax,liger,backward,speed,ms,V,feature size,16384,0.7516160011291504,0.7505919933319092,0.7516160011291504,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:22,0.5.8 sparsemax,liger,backward,speed,ms,V,feature size,32768,1.5738879442214966,1.572864055633545,1.575935959815979,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:22,0.5.8 sparsemax,torch,backward,speed,ms,V,feature size,1024,0.1812479943037033,0.1802240014076233,0.18227200210094452,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,torch,backward,speed,ms,V,feature size,2048,0.34406399726867676,0.34406399726867676,0.34508800506591797,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,torch,backward,speed,ms,V,feature size,4096,0.6717439889907837,0.6707199811935425,0.6727679967880249,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,torch,backward,speed,ms,V,feature size,8192,1.3250559568405151,1.3241215944290161,1.3260799646377563,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,torch,backward,speed,ms,V,feature size,16384,2.629631996154785,2.628607988357544,2.6306560039520264,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,torch,backward,speed,ms,V,feature size,32768,5.236735820770264,5.235712051391602,5.239808082580566,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,liger,full,memory,MB,V,feature size,1024,82.03515625,82.03515625,82.03515625,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,liger,full,memory,MB,V,feature size,2048,164.0390625,164.0390625,164.0390625,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,liger,full,memory,MB,V,feature size,4096,328.046875,328.046875,328.046875,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,liger,full,memory,MB,V,feature size,8192,704.00048828125,704.00048828125,704.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,liger,full,memory,MB,V,feature size,16384,1408.00048828125,1408.00048828125,1408.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,liger,full,memory,MB,V,feature size,32768,2816.00048828125,2816.00048828125,2816.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:25,0.5.8 sparsemax,torch,full,memory,MB,V,feature size,1024,82.03515625,82.03515625,82.03515625,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:26,0.5.8 sparsemax,torch,full,memory,MB,V,feature size,2048,164.0390625,164.0390625,164.0390625,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:26,0.5.8 sparsemax,torch,full,memory,MB,V,feature size,4096,328.046875,328.046875,328.046875,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:26,0.5.8 sparsemax,torch,full,memory,MB,V,feature size,8192,704.00048828125,704.00048828125,704.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:26,0.5.8 sparsemax,torch,full,memory,MB,V,feature size,16384,1408.00048828125,1408.00048828125,1408.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:26,0.5.8 sparsemax,torch,full,memory,MB,V,feature size,32768,2816.00048828125,2816.00048828125,2816.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:26,0.5.8 multi_token_attention,liger,forward,speed,ms,L,sequence length,32,0.01740800030529499,0.01740800030529499,0.018432000651955605,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1 multi_token_attention,liger,forward,speed,ms,L,sequence length,64,0.018432000651955605,0.01740800030529499,0.01945599913597107,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1 multi_token_attention,liger,forward,speed,ms,L,sequence length,128,0.023552000522613525,0.02252800017595291,0.02364799939095974,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1 multi_token_attention,liger,forward,speed,ms,L,sequence length,256,0.043007999658584595,0.04198399931192398,0.043007999658584595,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1 multi_token_attention,liger,forward,speed,ms,L,sequence length,512,0.12595200538635254,0.12492799758911133,0.12595200538635254,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1 multi_token_attention,liger,forward,speed,ms,L,sequence length,1024,0.5283839702606201,0.5253120064735413,0.5294079780578613,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1 multi_token_attention,torch,forward,speed,ms,L,sequence length,32,0.2467840015888214,0.24063999950885773,0.2529279887676239,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1 multi_token_attention,torch,forward,speed,ms,L,sequence length,64,0.24166400730609894,0.23756800591945648,0.24883200228214264,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1 multi_token_attention,torch,forward,speed,ms,L,sequence length,128,0.24268800020217896,0.2385600060224533,0.24985599517822266,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1 multi_token_attention,torch,forward,speed,ms,L,sequence length,256,0.24166400730609894,0.23873919248580933,0.24782079458236694,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1 multi_token_attention,torch,forward,speed,ms,L,sequence length,512,0.31334400177001953,0.3102720081806183,0.3213888108730316,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1 multi_token_attention,torch,forward,speed,ms,L,sequence length,1024,0.719871997833252,0.7167999744415283,0.7260159850120544,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1 multi_token_attention,liger,full,speed,ms,L,sequence length,32,0.9349120259284973,0.6543359756469727,0.9494400024414062,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1 multi_token_attention,liger,full,speed,ms,L,sequence length,64,0.6215680241584778,0.5631999969482422,0.8916991949081421,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1 multi_token_attention,liger,full,speed,ms,L,sequence length,128,0.5406720042228699,0.5335040092468262,0.550003170967102,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1 multi_token_attention,liger,full,speed,ms,L,sequence length,256,0.5631999969482422,0.5560320019721985,0.5674688220024109,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1 multi_token_attention,liger,full,speed,ms,L,sequence length,512,0.6430720090866089,0.6420480012893677,0.6430720090866089,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1 multi_token_attention,liger,full,speed,ms,L,sequence length,1024,2.4780800342559814,2.4770560264587402,2.479987144470215,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1 multi_token_attention,torch,full,speed,ms,L,sequence length,32,0.795199990272522,0.78438401222229,0.8038399815559387,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1 multi_token_attention,torch,full,speed,ms,L,sequence length,64,0.7362560033798218,0.6504960060119629,0.7464960217475891,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1 multi_token_attention,torch,full,speed,ms,L,sequence length,128,0.7680000066757202,0.6437439918518066,0.8105729818344116,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1 multi_token_attention,torch,full,speed,ms,L,sequence length,256,0.7685279846191406,0.7586879730224609,0.783519983291626,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1 multi_token_attention,torch,full,speed,ms,L,sequence length,512,0.9676799774169922,0.9625599980354309,0.9751039743423462,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1 multi_token_attention,torch,full,speed,ms,L,sequence length,1024,2.772480010986328,2.7688961029052734,2.7842559814453125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1 multi_token_attention,liger,backward,speed,ms,L,sequence length,32,0.334879994392395,0.3222528100013733,0.6912000179290771,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1 multi_token_attention,liger,backward,speed,ms,L,sequence length,64,0.23756800591945648,0.228166401386261,0.2629631757736206,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1 multi_token_attention,liger,backward,speed,ms,L,sequence length,128,0.29785600304603577,0.2519040107727051,0.3081727921962738,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1 multi_token_attention,liger,backward,speed,ms,L,sequence length,256,0.2590720057487488,0.24391679465770721,0.30832639336586,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1 multi_token_attention,liger,backward,speed,ms,L,sequence length,512,0.5171200037002563,0.5169600248336792,0.5181440114974976,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1 multi_token_attention,liger,backward,speed,ms,L,sequence length,1024,1.9578880071640015,1.9568639993667603,1.9615744352340698,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1 multi_token_attention,torch,backward,speed,ms,L,sequence length,32,0.09830400347709656,0.08908800035715103,0.20353920757770538,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,torch,backward,speed,ms,L,sequence length,64,0.06348799914121628,0.062463998794555664,0.06348799914121628,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,torch,backward,speed,ms,L,sequence length,128,0.09011200070381165,0.08908800035715103,0.09011200070381165,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,torch,backward,speed,ms,L,sequence length,256,0.16383999586105347,0.16383999586105347,0.16486400365829468,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,torch,backward,speed,ms,L,sequence length,512,0.52019202709198,0.5191680192947388,0.52019202709198,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,torch,backward,speed,ms,L,sequence length,1024,1.9763200283050537,1.9752960205078125,1.9763200283050537,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,liger,full,memory,MB,L,sequence length,32,0.97412109375,0.97412109375,0.97412109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,liger,full,memory,MB,L,sequence length,64,1.53662109375,1.53662109375,1.53662109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,liger,full,memory,MB,L,sequence length,128,3.69287109375,3.69287109375,3.69287109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,liger,full,memory,MB,L,sequence length,256,13.068359375,13.068359375,13.068359375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,liger,full,memory,MB,L,sequence length,512,48.974609375,48.974609375,48.974609375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,liger,full,memory,MB,L,sequence length,1024,192.974609375,192.974609375,192.974609375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,torch,full,memory,MB,L,sequence length,32,0.9599609375,0.9599609375,0.9599609375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,torch,full,memory,MB,L,sequence length,64,1.4814453125,1.4814453125,1.4814453125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,torch,full,memory,MB,L,sequence length,128,3.4736328125,3.4736328125,3.4736328125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,torch,full,memory,MB,L,sequence length,256,12.19287109375,12.19287109375,12.19287109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,torch,full,memory,MB,L,sequence length,512,45.47412109375,45.47412109375,45.47412109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 multi_token_attention,torch,full,memory,MB,L,sequence length,1024,178.97412109375,178.97412109375,178.97412109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1 softmax,liger,forward,speed,ms,N,hidden size,128,0.0071680000983178616,0.0071680000983178616,0.007942399941384792,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8 softmax,liger,forward,speed,ms,N,hidden size,256,0.008448000065982342,0.008191999979317188,0.009216000325977802,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8 softmax,liger,forward,speed,ms,N,hidden size,512,0.013311999849975109,0.01228800043463707,0.013311999849975109,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8 softmax,liger,forward,speed,ms,N,hidden size,1024,0.021503999829292297,0.021503999829292297,0.02252800017595291,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8 softmax,liger,forward,speed,ms,N,hidden size,2048,0.04095999896526337,0.04095999896526337,0.04198399931192398,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8 softmax,liger,forward,speed,ms,N,hidden size,4096,0.0798719972372055,0.0798719972372055,0.08089599758386612,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,128,0.006144000217318535,0.006144000217318535,0.0071680000983178616,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,256,0.008191999979317188,0.008191999979317188,0.009216000325977802,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,512,0.01228800043463707,0.01228800043463707,0.013311999849975109,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,1024,0.02252800017595291,0.02252800017595291,0.023552000522613525,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,2048,0.057583998888731,0.05734400078654289,0.058368001133203506,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,4096,0.08323200047016144,0.08294399827718735,0.08396799862384796,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8 softmax,liger,full,speed,ms,N,hidden size,128,0.053247999399900436,0.04505600035190582,0.06172160431742668,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8 softmax,liger,full,speed,ms,N,hidden size,256,0.05939200147986412,0.04198399931192398,0.11169920116662979,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8 softmax,liger,full,speed,ms,N,hidden size,512,0.11577600240707397,0.07720960676670074,0.16793599724769592,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8 softmax,liger,full,speed,ms,N,hidden size,1024,0.12492799758911133,0.10273279249668121,0.2982015907764435,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8 softmax,liger,full,speed,ms,N,hidden size,2048,0.1013759970664978,0.10035199671983719,0.12902399897575378,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8 softmax,liger,full,speed,ms,N,hidden size,4096,0.19660800695419312,0.19660800695419312,0.19763199985027313,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8 softmax,torch,full,speed,ms,N,hidden size,128,0.013311999849975109,0.013311999849975109,0.013504000380635262,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8 softmax,torch,full,speed,ms,N,hidden size,256,0.019152000546455383,0.018432000651955605,0.01945599913597107,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8 softmax,torch,full,speed,ms,N,hidden size,512,0.03891199827194214,0.03788800165057182,0.03891199827194214,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8 softmax,torch,full,speed,ms,N,hidden size,1024,0.08396799862384796,0.08396799862384796,0.08499199897050858,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8 softmax,torch,full,speed,ms,N,hidden size,2048,0.18329599499702454,0.18329599499702454,0.18432000279426575,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8 softmax,torch,full,speed,ms,N,hidden size,4096,0.3307519853115082,0.32972800731658936,0.33169281482696533,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8 softmax,liger,forward,speed,ms,N,hidden size,128,0.006335999816656113,0.006144000217318535,0.0071680000983178616,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8 softmax,liger,forward,speed,ms,N,hidden size,256,0.0071680000983178616,0.006144000217318535,0.0071680000983178616,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8 softmax,liger,forward,speed,ms,N,hidden size,512,0.008191999979317188,0.008191999979317188,0.009216000325977802,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8 softmax,liger,forward,speed,ms,N,hidden size,1024,0.013311999849975109,0.01228800043463707,0.013311999849975109,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8 softmax,liger,forward,speed,ms,N,hidden size,2048,0.02252800017595291,0.02252800017595291,0.023552000522613525,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8 softmax,liger,forward,speed,ms,N,hidden size,4096,0.04095999896526337,0.04095999896526337,0.04198399931192398,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,128,0.006144000217318535,0.005119999870657921,0.006144000217318535,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,256,0.006207999773323536,0.006144000217318535,0.0071680000983178616,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,512,0.008383999578654766,0.008191999979317188,0.009216000325977802,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,1024,0.014336000196635723,0.014336000196635723,0.014336000196635723,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,2048,0.05939200147986412,0.058368001133203506,0.05939200147986412,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8 softmax,torch,forward,speed,ms,N,hidden size,4096,0.06758400052785873,0.06675200164318085,0.06758400052785873,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8 softmax,liger,full,speed,ms,N,hidden size,128,0.11472000181674957,0.09744639694690704,0.20684799551963806,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8 softmax,liger,full,speed,ms,N,hidden size,256,0.15787199139595032,0.10769280046224594,0.20897281169891357,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8 softmax,liger,full,speed,ms,N,hidden size,512,0.14028799533843994,0.0832064226269722,0.2879999876022339,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8 softmax,liger,full,speed,ms,N,hidden size,1024,0.2088959962129593,0.11446399986743927,0.2972480058670044,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8 softmax,liger,full,speed,ms,N,hidden size,2048,0.1443839967250824,0.09318400174379349,0.28278398513793945,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8 softmax,liger,full,speed,ms,N,hidden size,4096,0.11673600226640701,0.10035199671983719,0.28074881434440613,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8 softmax,torch,full,speed,ms,N,hidden size,128,0.011264000087976456,0.010239999741315842,0.011264000087976456,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,speed,ms,N,hidden size,256,0.013311999849975109,0.013311999849975109,0.013632000423967838,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,speed,ms,N,hidden size,512,0.01945599913597107,0.01945599913597107,0.01945599913597107,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,speed,ms,N,hidden size,1024,0.04198399931192398,0.04198399931192398,0.04224000126123428,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,speed,ms,N,hidden size,2048,0.12595200538635254,0.12595200538635254,0.12697599828243256,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,speed,ms,N,hidden size,4096,0.19763199985027313,0.19660800695419312,0.19809921085834503,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,128,0.00244140625,0.00244140625,0.00244140625,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,256,0.0048828125,0.0048828125,0.0048828125,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,512,0.009765625,0.009765625,0.009765625,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,1024,0.01953125,0.01953125,0.01953125,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,2048,0.0390625,0.0390625,0.0390625,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,4096,0.078125,0.078125,0.078125,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,memory,MB,N,hidden size,128,0.0029296875,0.0029296875,0.0029296875,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,memory,MB,N,hidden size,256,0.005859375,0.005859375,0.005859375,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,memory,MB,N,hidden size,512,0.01171875,0.01171875,0.01171875,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,memory,MB,N,hidden size,1024,0.0234375,0.0234375,0.0234375,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,memory,MB,N,hidden size,2048,0.046875,0.046875,0.046875,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,memory,MB,N,hidden size,4096,0.09375,0.09375,0.09375,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,128,0.00244140625,0.00244140625,0.00244140625,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,256,0.00244140625,0.00244140625,0.00244140625,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,512,0.0048828125,0.0048828125,0.0048828125,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,1024,0.009765625,0.009765625,0.009765625,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,2048,0.01953125,0.01953125,0.01953125,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,liger,full,memory,MB,N,hidden size,4096,0.0390625,0.0390625,0.0390625,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8 softmax,torch,full,memory,MB,N,hidden size,128,0.0029296875,0.0029296875,0.0029296875,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8 softmax,torch,full,memory,MB,N,hidden size,256,0.0029296875,0.0029296875,0.0029296875,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8 softmax,torch,full,memory,MB,N,hidden size,512,0.005859375,0.005859375,0.005859375,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8 softmax,torch,full,memory,MB,N,hidden size,1024,0.01171875,0.01171875,0.01171875,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8 softmax,torch,full,memory,MB,N,hidden size,2048,0.0234375,0.0234375,0.0234375,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8 softmax,torch,full,memory,MB,N,hidden size,4096,0.046875,0.046875,0.046875,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8 sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,32,0.31436800956726074,0.30646398663520813,0.319487988948822,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8 sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,64,0.3779039978981018,0.3678207993507385,0.38410240411758423,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8 sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,128,0.35020801424980164,0.3428351879119873,0.35839998722076416,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8 sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,256,0.5294079780578613,0.5283839702606201,0.5304319858551025,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8 sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,512,1.7315839529037476,1.7304960489273071,1.815551996231079,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8 sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,1024,6.465375900268555,6.462463855743408,6.718054294586182,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8 sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,32,0.5888000130653381,0.5826560258865356,0.5960000157356262,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8 sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,64,0.6010879874229431,0.5947520136833191,0.608128011226654,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8 sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,128,0.5816320180892944,0.5745791792869568,0.5908480286598206,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8 sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,256,0.8591359853744507,0.8529919981956482,0.8627520203590393,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8 sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,512,1.931391954421997,1.925772786140442,1.935705542564392,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8 sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,1024,6.76915168762207,6.761676788330078,7.009791851043701,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8 sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,32,2.111056089401245,2.0716030597686768,2.137094497680664,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8 sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,64,2.174975872039795,2.1364736557006836,2.297856092453003,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8 sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,128,2.0894718170166016,2.073791980743408,2.1352319717407227,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8 sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,256,2.137216091156006,1.8400319814682007,2.194175958633423,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8 sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,512,2.2814719676971436,2.1872639656066895,2.2833151817321777,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8 sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,1024,8.308735847473145,8.299519538879395,8.551424026489258,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8 sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,32,1.5749119520187378,1.498412847518921,2.170527935028076,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8 sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,64,1.494047999382019,1.482604742050171,1.5207936763763428,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8 sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,128,1.4581760168075562,1.4419968128204346,2.1133759021759033,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8 sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,256,1.7448960542678833,1.7180671691894531,1.7537024021148682,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8 sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,512,2.796544075012207,2.7762560844421387,2.8190720081329346,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8 sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,1024,9.511823654174805,9.501286506652832,9.787391662597656,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8 sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,32,0.3544960021972656,0.33546239137649536,0.8041215538978577,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,64,0.32897597551345825,0.32051199674606323,0.3438591957092285,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,128,0.30931198596954346,0.3002240061759949,0.3197120130062103,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,256,0.31334400177001953,0.2956160008907318,0.3251904249191284,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,512,0.447488009929657,0.44646400213241577,0.4485119879245758,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,1024,1.8585599660873413,1.8574656248092651,1.861631989479065,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,32,0.25804799795150757,0.24883200228214264,0.30926719307899475,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,64,0.25804799795150757,0.2514623999595642,0.26668161153793335,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,128,0.24075199663639069,0.2303999960422516,0.25194239616394043,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,256,0.24686399102210999,0.23756800591945648,0.2550272047519684,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,512,0.7045120000839233,0.704479992389679,0.7063615918159485,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,1024,2.698431968688965,2.697216033935547,2.7013120651245117,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8 sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,32,0.3603515625,0.3603515625,0.3603515625,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,64,1.4189453125,1.4189453125,1.4189453125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,128,5.6455078125,5.6455078125,5.6455078125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,256,22.53662109375,22.53662109375,22.53662109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,512,90.06884765625,90.06884765625,90.06884765625,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,1024,360.13330078125,360.13330078125,360.13330078125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,32,0.45263671875,0.45263671875,0.45263671875,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,64,1.7685546875,1.7685546875,1.7685546875,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,128,7.04833984375,7.04833984375,7.04833984375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,256,28.15478515625,28.15478515625,28.15478515625,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,512,112.55517578125,112.55517578125,112.55517578125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,1024,450.10595703125,450.10595703125,450.10595703125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,64,0.236735999584198,0.16073599457740784,0.24985599517822266,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:08:54,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,128,0.22323200106620789,0.21503999829292297,0.2323904037475586,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:08:54,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,256,0.24268800020217896,0.2295808047056198,0.25088000297546387,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:08:54,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,512,0.3307519853115082,0.32805120944976807,0.3317759931087494,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:08:54,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,1024,0.8540160059928894,0.851967990398407,0.8595455884933472,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:08:54,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,2048,2.3658719062805176,2.3617537021636963,2.368511915206909,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:08:54,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,4096,8.466431617736816,8.447999954223633,8.480768203735352,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:08:54,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,64,5.16915225982666,5.143871784210205,5.297952175140381,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:01,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,128,10.244048118591309,10.094131469726562,10.48145866394043,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:01,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,256,20.196895599365234,20.145601272583008,21.581132888793945,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:01,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,512,42.183536529541016,41.2415771484375,43.12549591064453,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:01,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,1024,77.73798370361328,77.73798370361328,77.73798370361328,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:01,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,2048,172.90853881835938,172.90853881835938,172.90853881835938,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:01,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,4096,346.5686950683594,346.5686950683594,346.5686950683594,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:01,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,64,2.723423957824707,2.68287992477417,2.7842559814453125,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:14,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,128,2.6542398929595947,2.6169726848602295,2.68984317779541,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:14,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,256,2.595871925354004,2.1286911964416504,2.6818559169769287,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:14,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,512,2.738736152648926,2.7115519046783447,2.8180480003356934,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:14,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,1024,2.83457612991333,2.805759906768799,2.88972806930542,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:14,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,2048,6.529168128967285,6.525951862335205,6.66664981842041,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:14,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,4096,23.742895126342773,23.660747528076172,23.825515747070312,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:14,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,64,6.841343879699707,6.725196838378906,6.972832202911377,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:21,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,128,11.825152397155762,11.683839797973633,12.080537796020508,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:21,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,256,21.856351852416992,21.36012077331543,21.95940589904785,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:21,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,512,42.70033264160156,42.545169830322266,42.855499267578125,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:21,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,1024,87.9656982421875,87.9656982421875,87.9656982421875,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:21,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,2048,181.77536010742188,181.77536010742188,181.77536010742188,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:21,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,4096,368.0634765625,368.0634765625,368.0634765625,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:21,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,64,0.5920320153236389,0.5674688220024109,1.3856768608093262,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:22,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,128,0.6430720090866089,0.6318399906158447,0.6610943675041199,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:22,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,256,0.6456320285797119,0.6359040141105652,0.6676480174064636,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:22,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,512,0.7014399766921997,0.6911231875419617,0.7275007963180542,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:22,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,1024,1.4684159755706787,1.4663679599761963,1.4704639911651611,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:22,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,2048,4.150223731994629,4.14717435836792,4.234445095062256,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:22,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,4096,15.17465591430664,14.853119850158691,15.310848236083984,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:22,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,64,0.6000639796257019,0.5832703709602356,1.2799999713897705,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:25,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,128,0.5550079941749573,0.5488640069961548,0.5914624333381653,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:25,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,256,0.5470079779624939,0.5406720042228699,0.562175989151001,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:25,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,512,0.8714240193367004,0.8617984056472778,1.2751424312591553,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:25,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,1024,2.3746559619903564,2.3727169036865234,2.3797760009765625,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:25,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,2048,8.019968032836914,8.00870418548584,8.2227201461792,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:25,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,4096,28.92291259765625,28.684505462646484,28.97941780090332,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:25,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,64,0.23756800591945648,0.22630399465560913,0.24985599517822266,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:32,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,128,0.25088000297546387,0.24187520146369934,0.25964802503585815,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:32,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,256,0.43110400438308716,0.42920318245887756,0.43212801218032837,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:32,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,512,1.0199040174484253,1.0147839784622192,1.0281280279159546,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:32,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,1024,2.584575891494751,2.578432083129883,2.593791961669922,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:32,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,2048,7.8611040115356445,7.851212978363037,8.14100456237793,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:32,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,4096,27.072511672973633,27.043020248413086,27.129650115966797,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:32,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,64,5.303808212280273,5.205196857452393,5.414611339569092,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:38,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,128,10.352640151977539,10.268671989440918,10.546982765197754,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:38,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,256,20.696575164794922,20.600217819213867,22.168373107910156,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:38,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,512,40.9251823425293,39.459224700927734,42.39113998413086,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:38,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,1024,84.20972442626953,84.20972442626953,84.20972442626953,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:38,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,2048,165.5727996826172,165.5727996826172,165.5727996826172,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:38,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,4096,365.4942626953125,365.4942626953125,365.4942626953125,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:38,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,64,2.5410561561584473,2.5221376419067383,2.574540853500366,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:52,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,128,2.6214399337768555,2.5966720581054688,2.66780161857605,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:52,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,256,2.6818559169769287,2.660710334777832,2.7396223545074463,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:52,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,512,2.9624319076538086,2.959359884262085,2.973695993423462,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:52,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,1024,7.516160011291504,7.5141119956970215,7.782809734344482,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:52,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,2048,22.99033546447754,22.859058380126953,23.101655960083008,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:52,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,4096,79.14390563964844,79.14390563964844,79.14390563964844,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:09:52,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,64,6.206463813781738,6.177548885345459,6.346368312835693,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:00,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,128,11.45395278930664,11.369497299194336,11.57201862335205,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:00,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,256,21.295616149902344,20.8918514251709,22.428876876831055,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:00,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,512,46.485904693603516,44.799137115478516,48.172672271728516,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:00,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,1024,87.60115051269531,87.60115051269531,87.60115051269531,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:00,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,2048,210.36146545410156,210.36146545410156,210.36146545410156,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:00,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,4096,456.848388671875,456.848388671875,456.848388671875,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:00,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,64,0.5756800174713135,0.45319682359695435,0.7064127922058105,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:02,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,128,0.5908480286598206,0.48742398619651794,0.6028479933738708,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:02,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,256,0.915615975856781,0.8775680065155029,0.9175040125846863,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:02,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,512,1.9450880289077759,1.9351999759674072,1.9651199579238892,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:02,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,1024,4.930560111999512,4.915200233459473,5.046477317810059,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:02,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,2048,15.102832794189453,14.952447891235352,15.31494426727295,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:02,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,4096,52.104190826416016,52.104190826416016,52.104190826416016,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:02,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,64,0.4843519926071167,0.4761984050273895,0.6077119708061218,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:05,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,128,0.5319839715957642,0.5222399830818176,0.5335040092468262,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:05,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,256,1.1182080507278442,1.1151360273361206,1.120255947113037,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:05,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,512,2.5815041065216064,2.5763840675354004,2.5960447788238525,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:05,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,1024,7.123968124389648,7.087513446807861,7.359897613525391,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:05,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,2048,24.104448318481445,24.077312469482422,24.161880493164062,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:05,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,4096,86.40716552734375,86.40716552734375,86.40716552734375,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:05,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,64,0.2467840015888214,0.17902079224586487,0.25702399015426636,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:12,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,128,0.23756800591945648,0.23654399812221527,0.24885760247707367,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:12,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,256,0.4567039906978607,0.45158401131629944,0.4638719856739044,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:12,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,512,0.8017920255661011,0.7946239709854126,0.8048639893531799,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:12,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,1024,1.9527679681777954,1.9476544857025146,1.9595264196395874,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:12,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,2048,5.405695915222168,5.392384052276611,5.651423931121826,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:12,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,4096,18.608959197998047,18.311372756958008,18.646629333496094,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:12,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,64,6.554111957550049,6.130688190460205,6.872096061706543,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:20,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,128,13.195263862609863,13.134265899658203,13.464166641235352,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:20,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,256,24.001535415649414,23.594995498657227,25.934438705444336,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:20,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,512,50.334720611572266,50.334720611572266,50.334720611572266,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:20,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,1024,107.2701416015625,107.2701416015625,107.2701416015625,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:20,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,2048,218.13658142089844,218.13658142089844,218.13658142089844,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:20,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,4096,457.2313537597656,457.2313537597656,457.2313537597656,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:20,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,64,2.623487949371338,2.605638265609741,2.6442177295684814,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:34,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,128,2.6389598846435547,2.6225087642669678,2.6781694889068604,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:34,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,256,2.613312005996704,2.589139223098755,2.6998207569122314,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:34,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,512,2.7299840450286865,2.7037951946258545,2.783027172088623,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:34,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,1024,5.588992118835449,5.584896087646484,5.632409572601318,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:34,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,2048,15.91859245300293,15.853568077087402,16.029695510864258,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:34,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,4096,54.28019332885742,54.28019332885742,54.28019332885742,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:34,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,64,8.281087875366211,8.076288223266602,8.5731840133667,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:43,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,128,14.909952163696289,14.721952438354492,15.562975883483887,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:43,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,256,25.10848045349121,25.013248443603516,25.180980682373047,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:43,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,512,53.98118209838867,53.98118209838867,53.98118209838867,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:43,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,1024,115.51538848876953,115.51538848876953,115.51538848876953,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:43,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,2048,234.2144012451172,234.2144012451172,234.2144012451172,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:43,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,4096,493.1143798828125,493.1143798828125,493.1143798828125,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:43,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,64,0.6873279809951782,0.6780927777290344,0.8112127780914307,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:45,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,128,0.6923519968986511,0.6756608486175537,0.8371520042419434,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:45,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,256,0.7854080200195312,0.7739391922950745,0.7946239709854126,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:45,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,512,1.5523840188980103,1.5431679487228394,1.5880192518234253,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:45,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,1024,3.635200023651123,3.634176015853882,3.637446403503418,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:45,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,2048,10.225664138793945,10.196991920471191,10.515456199645996,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:45,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,4096,35.736061096191406,35.612876892089844,35.859249114990234,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:45,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,64,0.4935680031776428,0.4843519926071167,1.2861696481704712,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:48,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,128,0.5950400233268738,0.4885439872741699,0.7454720735549927,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:48,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,256,0.9082880020141602,0.8939520120620728,1.2302591800689697,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:48,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,512,1.994752049446106,1.9916800260543823,2.002943992614746,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:48,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,1024,5.427199840545654,5.400953769683838,5.5943169593811035,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:48,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,2048,16.917503356933594,16.85626792907715,17.202789306640625,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:48,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,4096,58.775550842285156,58.775550842285156,58.775550842285156,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:48,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,64,0.16998399794101715,0.159743994474411,0.24968959391117096,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:52,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,128,0.15515199303627014,0.14643199741840363,0.16281600296497345,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:52,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,256,0.16998399794101715,0.159743994474411,0.25088000297546387,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:52,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,512,0.3307519853115082,0.32767999172210693,0.3317759931087494,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:52,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,1024,0.8550400137901306,0.8529919981956482,0.8581119775772095,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:52,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,2048,2.3664638996124268,2.36456298828125,2.371583938598633,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:52,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,4096,8.253439903259277,8.21452808380127,8.534015655517578,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:52,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,64,5.056511878967285,4.674380779266357,5.254271984100342,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:58,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,128,10.41360092163086,10.147839546203613,10.88619613647461,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:58,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,256,21.108095169067383,19.98341178894043,22.000703811645508,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:58,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,512,39.93907165527344,39.49793243408203,40.380210876464844,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:58,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,1024,87.47724914550781,87.47724914550781,87.47724914550781,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:58,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,2048,162.8107147216797,162.8107147216797,162.8107147216797,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:58,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,4096,318.89202880859375,318.89202880859375,318.89202880859375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:58,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,64,2.756608009338379,2.50598406791687,2.862694263458252,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:59,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,128,2.683903932571411,2.656268835067749,2.720358371734619,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:59,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,256,2.6729280948638916,2.649907112121582,2.703104019165039,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:59,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,512,2.8049919605255127,2.7712254524230957,2.848358392715454,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:59,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,1024,2.8816640377044678,2.8426239490509033,2.966118335723877,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:59,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,2048,6.523903846740723,6.52185583114624,6.534143924713135,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:59,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,4096,23.48236846923828,23.36788558959961,23.587430953979492,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:10:59,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,64,6.210592269897461,6.149964809417725,6.439935684204102,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:06,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,128,11.412479400634766,11.000422477722168,12.122776985168457,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:06,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,256,21.02124786376953,20.722354888916016,21.280357360839844,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:06,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,512,44.49420928955078,43.21909713745117,45.769317626953125,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:06,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,1024,77.97862243652344,77.97862243652344,77.97862243652344,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:06,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,2048,169.87033081054688,169.87033081054688,169.87033081054688,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:06,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,4096,360.7623596191406,360.7623596191406,360.7623596191406,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:06,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,64,0.6484479904174805,0.5443072319030762,1.446675181388855,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:07,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,128,0.5460799932479858,0.536575973033905,0.6473984122276306,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:07,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,256,0.5612640380859375,0.5377407670021057,0.6634495854377747,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:07,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,512,0.6347839832305908,0.6327999830245972,0.7219520211219788,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:07,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,1024,1.4684159755706787,1.4624768495559692,1.4744960069656372,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:07,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,2048,4.150784015655518,4.148223876953125,4.164403438568115,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:07,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,4096,15.233535766601562,14.96678352355957,15.318016052246094,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:07,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,64,0.596992015838623,0.5801728367805481,1.2581120729446411,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,128,0.5565760135650635,0.456928014755249,0.5724160075187683,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,256,0.5560640096664429,0.4616512060165405,0.5724160075187683,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,512,0.8714240193367004,0.8622080087661743,1.2775424718856812,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,1024,2.3746559619903564,2.371583938598633,2.3776895999908447,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,2048,8.032719612121582,8.015257835388184,8.314061164855957,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,4096,29.113344192504883,28.672204971313477,29.20366096496582,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,64,32.525390625,32.525390625,32.525390625,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,128,37.7734375,37.7734375,37.7734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,256,53.2734375,53.2734375,53.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,512,102.2734375,102.2734375,102.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,1024,272.2734375,272.2734375,272.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,2048,900.2734375,900.2734375,900.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,4096,3308.2734375,3308.2734375,3308.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:10,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,64,32.53125,32.53125,32.53125,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:17,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,128,36.8046875,36.8046875,36.8046875,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:17,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,256,53.3359375,53.3359375,53.3359375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:17,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,512,110.5234375,110.5234375,110.5234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:17,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,1024,321.2734375,321.2734375,321.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:17,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,2048,1128.2734375,1128.2734375,1128.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:17,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,4096,4284.2734375,4284.2734375,4284.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:17,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,64,55.2880859375,55.2880859375,55.2880859375,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:18,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,128,72.28515625,72.28515625,72.28515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:18,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,256,119.03515625,119.03515625,119.03515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:18,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,512,265.28515625,265.28515625,265.28515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:18,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,1024,775.28515625,775.28515625,775.28515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:18,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,2048,2659.28515625,2659.28515625,2659.28515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:18,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,4096,9883.28515625,9883.28515625,9883.28515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:18,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,64,55.2919921875,55.2919921875,55.2919921875,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:27,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,128,70.05078125,70.05078125,70.05078125,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:27,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,256,118.34765625,118.34765625,118.34765625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:27,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,512,289.53515625,289.53515625,289.53515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:27,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,1024,920.28515625,920.28515625,920.28515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:27,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,2048,3335.28515625,3335.28515625,3335.28515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:27,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,4096,12779.28515625,12779.28515625,12779.28515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:27,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,64,74.80078125,74.80078125,74.80078125,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:28,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,128,83.296875,83.296875,83.296875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:28,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,256,114.296875,114.296875,114.296875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:28,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,512,212.296875,212.296875,212.296875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:28,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,1024,552.296875,552.296875,552.296875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:28,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,2048,1808.296875,1808.296875,1808.296875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:28,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,4096,6624.296875,6624.296875,6624.296875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:28,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,64,74.8046875,74.8046875,74.8046875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:38,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,128,82.31640625,82.31640625,82.31640625,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:38,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,256,114.359375,114.359375,114.359375,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:38,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,512,228.546875,228.546875,228.546875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:38,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,1024,649.296875,649.296875,649.296875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:38,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,2048,2260.296875,2260.296875,2260.296875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:38,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,4096,8560.296875,8560.296875,8560.296875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:38,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,64,32.525390625,32.525390625,32.525390625,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:39,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,128,37.7734375,37.7734375,37.7734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:39,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,256,53.2734375,53.2734375,53.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:39,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,512,102.2734375,102.2734375,102.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:39,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,1024,272.2734375,272.2734375,272.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:39,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,2048,900.2734375,900.2734375,900.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:39,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,4096,3308.2734375,3308.2734375,3308.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:39,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,64,32.53125,32.53125,32.53125,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:46,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,128,36.8046875,36.8046875,36.8046875,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:46,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,256,53.3359375,53.3359375,53.3359375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:46,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,512,110.5234375,110.5234375,110.5234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:46,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,1024,321.2734375,321.2734375,321.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:46,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,2048,1128.2734375,1128.2734375,1128.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:46,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,4096,4284.2734375,4284.2734375,4284.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-27 15:11:46,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,64,0.25600001215934753,0.25436800718307495,0.2605184018611908,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:08,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,128,0.2569279968738556,0.25494399666786194,0.26105600595474243,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:08,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,256,0.25676798820495605,0.2550591826438904,0.2598848044872284,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:08,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,512,0.25841599702835083,0.25681281089782715,0.2625727951526642,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:08,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,1024,0.3150399923324585,0.31407999992370605,0.31611520051956177,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:08,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,2048,0.8260959982872009,0.8238016366958618,0.828614354133606,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:08,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,4096,2.5686399936676025,2.557523012161255,2.5757951736450195,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:08,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,64,5.276463985443115,5.270419120788574,5.286643028259277,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:14,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,128,10.498432159423828,10.476134300231934,10.51439380645752,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:14,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,256,20.82036781311035,20.771360397338867,20.881420135498047,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:14,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,512,42.07323455810547,41.776065826416016,42.370399475097656,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:14,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,1024,81.8509750366211,81.8509750366211,81.8509750366211,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:14,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,2048,165.88720703125,165.88720703125,165.88720703125,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:14,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,4096,331.2662658691406,331.2662658691406,331.2662658691406,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:14,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,64,0.8993600010871887,0.8924031853675842,0.9097279906272888,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:25,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,128,0.8939200043678284,0.8890752196311951,0.9034687876701355,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:25,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,256,0.9244480133056641,0.9180480241775513,0.940447986125946,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:25,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,512,0.9229600429534912,0.915289580821991,0.9307839870452881,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:25,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,1024,0.9950560331344604,0.9915199875831604,0.9971520304679871,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:25,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,2048,2.5537919998168945,2.548985481262207,2.5564353466033936,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:25,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,4096,7.698319911956787,7.67669153213501,7.713951587677002,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:25,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,64,5.840767860412598,5.819551944732666,5.864096164703369,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:31,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,128,11.064079284667969,11.050003051757812,11.102252960205078,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:31,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,256,21.443504333496094,21.364646911621094,21.61541748046875,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:31,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,512,42.16088104248047,42.137290954589844,42.18446731567383,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:31,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,1024,84.43017578125,84.43017578125,84.43017578125,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:31,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,2048,169.27821350097656,169.27821350097656,169.27821350097656,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:31,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,4096,342.5223388671875,342.5223388671875,342.5223388671875,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:31,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,64,0.49110400676727295,0.4891200065612793,0.49513599276542664,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:32,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,128,0.4911839962005615,0.4894847869873047,0.4949440062046051,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:32,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,256,0.5103520154953003,0.5084800124168396,0.5146496295928955,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:32,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,512,0.5199040174484253,0.5182399749755859,0.5254335999488831,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:32,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,1024,0.6806079745292664,0.6792960166931152,0.681990385055542,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:32,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,2048,1.7373919486999512,1.7352639436721802,1.7395071983337402,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:32,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,4096,5.2151360511779785,5.205132484436035,5.221510410308838,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:32,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,64,0.4123840034008026,0.41091200709342957,0.4163135886192322,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:35,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,128,0.4136800169944763,0.41203200817108154,0.4168703854084015,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:35,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,256,0.4320639967918396,0.4301888048648834,0.4355071783065796,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:35,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,512,0.44307199120521545,0.44010239839553833,0.4480448067188263,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:35,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,1024,0.9624000191688538,0.9609023928642273,0.9633920192718506,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:35,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,2048,2.6429600715637207,2.641439914703369,2.644223928451538,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:35,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,4096,8.974464416503906,8.973376274108887,8.97913646697998,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:35,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,64,0.2598559856414795,0.2580096125602722,0.2628991901874542,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:40,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,128,0.2602880001068115,0.25900799036026,0.26241281628608704,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:40,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,256,0.2643519937992096,0.2627519965171814,0.26796799898147583,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:40,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,512,0.41286399960517883,0.4122239947319031,0.4134399890899658,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:40,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,1024,0.9781439900398254,0.9763264060020447,0.9801728129386902,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:40,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,2048,2.659600019454956,2.655103921890259,2.6648640632629395,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:40,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,4096,8.184944152832031,8.175705909729004,8.197542190551758,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:40,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,64,5.3048319816589355,5.287481784820557,5.315853118896484,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:47,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,128,10.493408203125,10.434623718261719,10.539365768432617,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:47,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,256,20.872079849243164,20.860185623168945,21.320632934570312,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:47,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,512,41.84241485595703,41.80018615722656,41.884647369384766,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:47,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,1024,84.96883392333984,84.96883392333984,84.96883392333984,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:47,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,2048,169.7915802001953,169.7915802001953,169.7915802001953,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:47,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,4096,345.4809265136719,345.4809265136719,345.4809265136719,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:47,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,64,0.9144960045814514,0.9068800210952759,0.9251199960708618,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,128,0.9177280068397522,0.9107391834259033,0.9262208342552185,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,256,0.9360480308532715,0.9290496110916138,0.949785590171814,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,512,1.2921760082244873,1.289574384689331,1.2943040132522583,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,1024,2.9243199825286865,2.919097423553467,2.9282751083374023,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,2048,7.83568000793457,7.829171180725098,7.843168258666992,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,4096,24.4779052734375,24.40936279296875,24.545881271362305,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:25:56,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,64,5.912464141845703,5.879615783691406,5.923999786376953,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,128,11.05232048034668,11.035250663757324,11.079456329345703,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,256,21.471296310424805,21.445714950561523,21.49998664855957,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,512,42.718048095703125,42.69863510131836,42.73746109008789,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,1024,86.00204467773438,86.00204467773438,86.00204467773438,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,2048,177.3928985595703,177.3928985595703,177.3928985595703,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,4096,373.61773681640625,373.61773681640625,373.61773681640625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:03,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,64,0.5130239725112915,0.5107200145721436,0.5175104141235352,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:05,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,128,0.5187360048294067,0.5168319940567017,0.522816002368927,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:05,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,256,0.5284639596939087,0.5261759757995605,0.5319616198539734,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:05,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,512,0.8799999952316284,0.8791552186012268,0.8812223672866821,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:05,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,1024,1.9606720209121704,1.9588288068771362,1.9625920057296753,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:05,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,2048,5.239616394042969,5.233331203460693,5.246374607086182,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:05,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,4096,16.295886993408203,16.174047470092773,16.315935134887695,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:05,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,64,0.4262079894542694,0.42505601048469543,0.42970240116119385,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:07,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,128,0.43747198581695557,0.43620482087135315,0.4399871826171875,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:07,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,256,0.5542719960212708,0.5531839728355408,0.555072009563446,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:07,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,512,1.0854079723358154,1.0841728448867798,1.0862784385681152,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:07,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,1024,2.6914560794830322,2.6902334690093994,2.6927361488342285,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:07,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,2048,8.072175979614258,8.052319526672363,8.081612586975098,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:07,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,4096,27.25152015686035,27.248275756835938,27.25334358215332,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:07,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,64,0.26579201221466064,0.26371198892593384,0.2690303921699524,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:14,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,128,0.26337599754333496,0.26162558794021606,0.2659648060798645,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:14,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,256,0.264384001493454,0.2627967894077301,0.267276793718338,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:14,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,512,0.3535360097885132,0.3527039885520935,0.3543359935283661,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:14,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,1024,0.7347840070724487,0.7331455945968628,0.7361727952957153,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:14,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,2048,1.8545279502868652,1.850592017173767,1.8574399948120117,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:14,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,4096,5.953392028808594,5.927840232849121,5.962080001831055,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:14,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,64,6.691328048706055,6.674118518829346,6.712192058563232,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:22,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,128,13.332127571105957,13.322579383850098,13.362988471984863,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:22,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,256,26.70470428466797,26.678035736083984,27.087322235107422,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:22,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,512,52.936126708984375,52.936126708984375,52.936126708984375,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:22,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,1024,107.26537322998047,107.26537322998047,107.26537322998047,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:22,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,2048,213.9727020263672,213.9727020263672,213.9727020263672,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:22,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,4096,430.3240966796875,430.3240966796875,430.3240966796875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:22,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,64,0.912992000579834,0.8976320028305054,0.9327296018600464,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:32,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,128,0.9216639995574951,0.9107776284217834,0.9301823973655701,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:32,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,256,0.915615975856781,0.9078848361968994,0.9261952042579651,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:32,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,512,1.1379199028015137,1.1355520486831665,1.1407424211502075,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:32,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,1024,2.277343988418579,2.268371343612671,2.2814719676971436,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:32,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,2048,5.6143999099731445,5.608166217803955,5.673030376434326,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:32,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,4096,17.534591674804688,17.516069412231445,17.57676124572754,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:32,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,64,7.29852819442749,7.287238597869873,7.318784236907959,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:40,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,128,13.901632308959961,13.893203735351562,13.942361831665039,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:40,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,256,27.261056900024414,27.254297256469727,27.288244247436523,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:40,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,512,54.26707077026367,54.26707077026367,54.26707077026367,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:40,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,1024,108.40013122558594,108.40013122558594,108.40013122558594,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:40,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,2048,220.19622802734375,220.19622802734375,220.19622802734375,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:40,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,4096,453.9944763183594,453.9944763183594,453.9944763183594,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:40,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,64,0.49564799666404724,0.4941760003566742,0.49819520115852356,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:42,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,128,0.5055680274963379,0.5036479830741882,0.5097920298576355,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:42,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,256,0.5073280334472656,0.5049920082092285,0.5109120011329651,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:42,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,512,0.7868000268936157,0.7859584093093872,0.7878463864326477,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:42,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,1024,1.5349119901657104,1.5336960554122925,1.5368640422821045,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:42,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,2048,3.791167974472046,3.787168025970459,3.802060842514038,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:42,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,4096,11.613519668579102,11.596006393432617,11.618464469909668,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:42,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,64,0.41388800740242004,0.412447988986969,0.417279988527298,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:45,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,128,0.42691200971603394,0.42473599314689636,0.4324415922164917,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:45,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,256,0.4886400103569031,0.48771199584007263,0.48993921279907227,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:45,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,512,0.9216960072517395,0.9203839898109436,0.9231168031692505,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:45,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,1024,1.9877119064331055,1.9866175651550293,1.9888639450073242,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:45,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,2048,5.659264087677002,5.653772830963135,5.6628031730651855,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:45,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,4096,18.87718391418457,18.870214462280273,18.878368377685547,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:45,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,64,0.26070401072502136,0.258950412273407,0.26361599564552307,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:49,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,128,0.2584800124168396,0.256985604763031,0.26101118326187134,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:49,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,256,0.25942400097846985,0.25811201333999634,0.2618303894996643,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:49,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,512,0.26097601652145386,0.25948798656463623,0.2640959918498993,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:49,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,1024,0.3149600028991699,0.3140160143375397,0.31593599915504456,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:49,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,2048,0.8244799971580505,0.8216319680213928,0.8271167874336243,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:49,0.5.10 fused_neighborhood_attention,liger,forward,speed,ms,seq_len,sequence length,4096,2.5662078857421875,2.5587263107299805,2.5770816802978516,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:49,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,64,5.195775985717773,5.172947406768799,5.230342388153076,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:55,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,128,10.488927841186523,10.467231750488281,10.511955261230469,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:55,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,256,21.20012664794922,21.1026554107666,21.275672912597656,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:55,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,512,43.42755126953125,42.99705123901367,43.858055114746094,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:55,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,1024,84.55020904541016,84.55020904541016,84.55020904541016,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:55,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,2048,169.3335418701172,169.3335418701172,169.3335418701172,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:55,0.5.10 fused_neighborhood_attention,torch,forward,speed,ms,seq_len,sequence length,4096,340.14495849609375,340.14495849609375,340.14495849609375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:55,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,64,0.8945279717445374,0.886732816696167,0.9055423736572266,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,128,0.8908159732818604,0.8847360014915466,0.8983359932899475,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,256,0.9086400270462036,0.9012479782104492,0.9151040315628052,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,512,0.9225280284881592,0.9153919816017151,0.9314560294151306,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,1024,0.9986559748649597,0.9929599761962891,1.0019199848175049,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,2048,2.5703680515289307,2.56607985496521,2.574105739593506,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:56,0.5.10 fused_neighborhood_attention,liger,full,speed,ms,seq_len,sequence length,4096,7.78985595703125,7.7626495361328125,7.792575836181641,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:26:56,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,64,5.764095783233643,5.736550331115723,5.7790656089782715,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,128,11.027040481567383,11.009875297546387,11.10332202911377,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,256,21.499038696289062,21.467283248901367,21.521759033203125,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,512,42.39520263671875,42.34148025512695,42.44892120361328,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,1024,85.2570571899414,85.2570571899414,85.2570571899414,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,2048,172.73379516601562,172.73379516601562,172.73379516601562,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:03,0.5.10 fused_neighborhood_attention,torch,full,speed,ms,seq_len,sequence length,4096,347.4947509765625,347.4947509765625,347.4947509765625,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:03,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,64,0.4941760003566742,0.49265921115875244,0.4977791905403137,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:04,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,128,0.49348801374435425,0.49185919761657715,0.4974527955055237,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:04,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,256,0.5101760029792786,0.5087360143661499,0.5148288011550903,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:04,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,512,0.5200639963150024,0.5186240077018738,0.5237439870834351,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:04,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,1024,0.6887840032577515,0.6859776377677917,0.6903167963027954,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:04,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,2048,1.7373759746551514,1.7341376543045044,1.7395455837249756,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:04,0.5.10 fused_neighborhood_attention,liger,backward,speed,ms,seq_len,sequence length,4096,5.201104164123535,5.196633815765381,5.208876609802246,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:04,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,64,0.4107840061187744,0.40908798575401306,0.41468799114227295,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:06,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,128,0.4121600091457367,0.4106624126434326,0.4156480133533478,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:06,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,256,0.4296959936618805,0.42847999930381775,0.4339391887187958,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:06,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,512,0.43406400084495544,0.4329279959201813,0.43656960129737854,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:06,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,1024,0.9568639993667603,0.9556096196174622,0.9582463502883911,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:06,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,2048,2.6357598304748535,2.634399890899658,2.6394240856170654,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:06,0.5.10 fused_neighborhood_attention,torch,backward,speed,ms,seq_len,sequence length,4096,8.944831848144531,8.943455696105957,8.947711944580078,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:06,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,64,80.275390625,80.275390625,80.275390625,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:07,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,128,85.5234375,85.5234375,85.5234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:07,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,256,101.0234375,101.0234375,101.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:07,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,512,150.0234375,150.0234375,150.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:07,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,1024,320.0234375,320.0234375,320.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:07,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,2048,948.0234375,948.0234375,948.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:07,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,4096,3356.0234375,3356.0234375,3356.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:07,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,64,80.28125,80.28125,80.28125,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,128,84.5546875,84.5546875,84.5546875,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,256,101.0859375,101.0859375,101.0859375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,512,158.2734375,158.2734375,158.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,1024,369.0234375,369.0234375,369.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,2048,1176.0234375,1176.0234375,1176.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,4096,4332.0234375,4332.0234375,4332.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,64,103.0380859375,103.0380859375,103.0380859375,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,128,120.78515625,120.78515625,120.78515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,256,166.78515625,166.78515625,166.78515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,512,313.03515625,313.03515625,313.03515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,1024,823.03515625,823.03515625,823.03515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,2048,2707.03515625,2707.03515625,2707.03515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,4096,9931.03515625,9931.03515625,9931.03515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:14,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,64,103.0419921875,103.0419921875,103.0419921875,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,128,117.05078125,117.05078125,117.05078125,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,256,167.34765625,167.34765625,167.34765625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,512,337.28515625,337.28515625,337.28515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,1024,968.03515625,968.03515625,968.03515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,2048,3383.03515625,3383.03515625,3383.03515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,4096,12827.03515625,12827.03515625,12827.03515625,"{""batch_size"": 4, ""hidden_size"": 768, ""num_heads"": 12, ""kernel_size"": 7, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,64,122.55078125,122.55078125,122.55078125,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,128,131.046875,131.046875,131.046875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,256,162.046875,162.046875,162.046875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,512,260.046875,260.046875,260.046875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,1024,600.046875,600.046875,600.046875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,2048,1856.046875,1856.046875,1856.046875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,4096,6672.046875,6672.046875,6672.046875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:22,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,64,122.5546875,122.5546875,122.5546875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,128,130.06640625,130.06640625,130.06640625,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,256,162.109375,162.109375,162.109375,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,512,276.296875,276.296875,276.296875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,1024,697.046875,697.046875,697.046875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,2048,2308.046875,2308.046875,2308.046875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,4096,8608.046875,8608.046875,8608.046875,"{""batch_size"": 2, ""hidden_size"": 1024, ""num_heads"": 16, ""kernel_size"": 9, ""dilation"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,64,80.275390625,80.275390625,80.275390625,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,128,85.5234375,85.5234375,85.5234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,256,101.0234375,101.0234375,101.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,512,150.0234375,150.0234375,150.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,1024,320.0234375,320.0234375,320.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,2048,948.0234375,948.0234375,948.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,liger,full,memory,MB,seq_len,sequence length,4096,3356.0234375,3356.0234375,3356.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:32,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,64,80.28125,80.28125,80.28125,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:39,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,128,84.5546875,84.5546875,84.5546875,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:39,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,256,101.0859375,101.0859375,101.0859375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:39,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,512,158.2734375,158.2734375,158.2734375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:39,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,1024,369.0234375,369.0234375,369.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:39,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,2048,1176.0234375,1176.0234375,1176.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:39,0.5.10 fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,4096,4332.0234375,4332.0234375,4332.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:39,0.5.10 distill_cosine_loss,liger,forward,speed,ms,BT,B x T,1024,13.828096389770508,13.821133041381836,13.885849952697754,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:19:52,0.5.10 distill_cosine_loss,liger,forward,speed,ms,BT,B x T,2048,27.57427215576172,27.52573432922363,27.579801940917967,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:19:52,0.5.10 distill_cosine_loss,liger,forward,speed,ms,BT,B x T,4096,54.79423904418945,54.79423904418945,54.79423904418945,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:19:52,0.5.10 distill_cosine_loss,liger,forward,speed,ms,BT,B x T,8192,109.73490905761719,109.73490905761719,109.73490905761719,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:19:52,0.5.10 distill_cosine_loss,torch,forward,speed,ms,BT,B x T,1024,16.456703186035156,15.045836448669434,16.761650466918944,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:20:34,0.5.10 distill_cosine_loss,torch,forward,speed,ms,BT,B x T,2048,29.703168869018555,29.69333839416504,29.71177024841309,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:20:34,0.5.10 distill_cosine_loss,torch,forward,speed,ms,BT,B x T,4096,59.177982330322266,59.177982330322266,59.177982330322266,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:20:34,0.5.10 distill_cosine_loss,torch,forward,speed,ms,BT,B x T,8192,118.3815689086914,118.3815689086914,118.3815689086914,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:20:34,0.5.10 distill_cosine_loss,liger,full,speed,ms,BT,B x T,1024,14.654463768005371,14.63398380279541,14.68006420135498,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:21:16,0.5.10 distill_cosine_loss,liger,full,speed,ms,BT,B x T,2048,28.274688720703125,28.27284507751465,28.279603958129883,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:21:16,0.5.10 distill_cosine_loss,liger,full,speed,ms,BT,B x T,4096,55.96672058105469,55.96672058105469,55.96672058105469,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:21:16,0.5.10 distill_cosine_loss,liger,full,speed,ms,BT,B x T,8192,111.38764953613281,111.38764953613281,111.38764953613281,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:21:16,0.5.10 distill_cosine_loss,torch,full,speed,ms,BT,B x T,1024,37.45382308959961,37.42556076049805,37.482085418701175,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:01,0.5.10 distill_cosine_loss,torch,full,speed,ms,BT,B x T,2048,73.56620788574219,73.56620788574219,73.56620788574219,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:01,0.5.10 distill_cosine_loss,torch,full,speed,ms,BT,B x T,4096,145.73056030273438,145.73056030273438,145.73056030273438,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:01,0.5.10 distill_cosine_loss,torch,full,speed,ms,BT,B x T,8192,291.5000305175781,291.5000305175781,291.5000305175781,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:01,0.5.10 distill_cosine_loss,liger,full,memory,MB,BT,B x T,1024,5059.26806640625,5059.26806640625,5059.26806640625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:43,0.5.10 distill_cosine_loss,liger,full,memory,MB,BT,B x T,2048,5087.27587890625,5087.27587890625,5087.27587890625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:43,0.5.10 distill_cosine_loss,liger,full,memory,MB,BT,B x T,4096,5143.29150390625,5143.29150390625,5143.29150390625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:43,0.5.10 distill_cosine_loss,liger,full,memory,MB,BT,B x T,8192,5255.32275390625,5255.32275390625,5255.32275390625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:43,0.5.10 distill_cosine_loss,torch,full,memory,MB,BT,B x T,1024,7566.2822265625,7566.2822265625,7566.2822265625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10 distill_cosine_loss,torch,full,memory,MB,BT,B x T,2048,11590.3134765625,11590.3134765625,11590.3134765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10 distill_cosine_loss,torch,full,memory,MB,BT,B x T,4096,19654.375,19654.375,19654.375,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10 distill_cosine_loss,torch,full,memory,MB,BT,B x T,8192,35782.5,35782.5,35782.5,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10 layer_norm,liger,forward,speed,ms,N,hidden size,1024,0.018848000094294548,0.018400000408291817,0.020102400332689285,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:11,0.6.0 layer_norm,liger,forward,speed,ms,N,hidden size,2048,0.029152000322937965,0.02876799926161766,0.029823999851942062,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:11,0.6.0 layer_norm,liger,forward,speed,ms,N,hidden size,4096,0.05104000121355057,0.05036799982190132,0.05177599936723709,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:11,0.6.0 layer_norm,liger,forward,speed,ms,N,hidden size,8192,0.0947519987821579,0.09436800330877304,0.09507200121879578,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:11,0.6.0 layer_norm,liger,forward,speed,ms,N,hidden size,16384,0.18476800620555878,0.18396799266338348,0.1852159947156906,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:11,0.6.0 layer_norm,huggingface,forward,speed,ms,N,hidden size,1024,0.023584000766277313,0.023423999547958374,0.023840000852942467,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:14,0.6.0 layer_norm,huggingface,forward,speed,ms,N,hidden size,2048,0.03734400123357773,0.03702399879693985,0.037811201065778746,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:14,0.6.0 layer_norm,huggingface,forward,speed,ms,N,hidden size,4096,0.06617599725723267,0.06560000032186508,0.06678400188684464,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:14,0.6.0 layer_norm,huggingface,forward,speed,ms,N,hidden size,8192,0.15267199277877808,0.15190400183200836,0.15347200632095337,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:14,0.6.0 layer_norm,huggingface,forward,speed,ms,N,hidden size,16384,0.3067840039730072,0.3046143889427185,0.3081152021884918,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:14,0.6.0 layer_norm,liger,backward,speed,ms,N,hidden size,1024,0.12006399780511856,0.11653760075569153,0.12467200309038162,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:16,0.6.0 layer_norm,liger,backward,speed,ms,N,hidden size,2048,0.1207360029220581,0.1176128014922142,0.1256511986255646,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:16,0.6.0 layer_norm,liger,backward,speed,ms,N,hidden size,4096,0.16630400717258453,0.16412800550460815,0.16838400065898895,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:16,0.6.0 layer_norm,liger,backward,speed,ms,N,hidden size,8192,0.31279999017715454,0.31116798520088196,0.3145279884338379,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:16,0.6.0 layer_norm,liger,backward,speed,ms,N,hidden size,16384,0.5776320099830627,0.5753471970558167,0.5798912048339844,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:16,0.6.0 layer_norm,huggingface,backward,speed,ms,N,hidden size,1024,0.0605119988322258,0.059647999703884125,0.061344001442193985,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:18,0.6.0 layer_norm,huggingface,backward,speed,ms,N,hidden size,2048,0.09967999905347824,0.09849599748849869,0.10099200159311295,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:18,0.6.0 layer_norm,huggingface,backward,speed,ms,N,hidden size,4096,0.17881600558757782,0.17795200645923615,0.17971199750900269,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:18,0.6.0 layer_norm,huggingface,backward,speed,ms,N,hidden size,8192,0.33369600772857666,0.3328000009059906,0.33478400111198425,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:18,0.6.0 layer_norm,huggingface,backward,speed,ms,N,hidden size,16384,0.6424000263214111,0.6412223815917969,0.643455982208252,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:18,0.6.0 layer_norm,liger,full,speed,ms,N,hidden size,1024,0.26576000452041626,0.2629248082637787,0.2701759934425354,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:21,0.6.0 layer_norm,liger,full,speed,ms,N,hidden size,2048,0.27427199482917786,0.26999040842056277,0.28091518878936766,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:21,0.6.0 layer_norm,liger,full,speed,ms,N,hidden size,4096,0.27454400062561035,0.27004799246788025,0.2807359993457794,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:21,0.6.0 layer_norm,liger,full,speed,ms,N,hidden size,8192,0.40556800365448,0.40403199195861816,0.40723198652267456,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:21,0.6.0 layer_norm,liger,full,speed,ms,N,hidden size,16384,0.7608960270881653,0.7589311957359314,0.7631679773330688,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:21,0.6.0 layer_norm,huggingface,full,speed,ms,N,hidden size,1024,0.08025600016117096,0.07942400127649307,0.08111999928951263,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,huggingface,full,speed,ms,N,hidden size,2048,0.13315199315547943,0.13180799782276154,0.13468800485134125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,huggingface,full,speed,ms,N,hidden size,4096,0.2417600005865097,0.24089600145816803,0.24262399971485138,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,huggingface,full,speed,ms,N,hidden size,8192,0.4832639992237091,0.48214399814605713,0.4843647956848145,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,huggingface,full,speed,ms,N,hidden size,16384,0.950575977563858,0.9484800100326538,0.9528064012527466,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,liger,full,memory,MB,N,hidden size,1024,80.0625,80.0625,80.0625,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,liger,full,memory,MB,N,hidden size,2048,160.09375,160.09375,160.09375,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,liger,full,memory,MB,N,hidden size,4096,320.15625,320.15625,320.15625,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,liger,full,memory,MB,N,hidden size,8192,640.28125,640.28125,640.28125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,liger,full,memory,MB,N,hidden size,16384,1280.53125,1280.53125,1280.53125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,huggingface,full,memory,MB,N,hidden size,1024,80.0625,80.0625,80.0625,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,huggingface,full,memory,MB,N,hidden size,2048,160.09375,160.09375,160.09375,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,huggingface,full,memory,MB,N,hidden size,4096,320.15625,320.15625,320.15625,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,huggingface,full,memory,MB,N,hidden size,8192,640.28125,640.28125,640.28125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 layer_norm,huggingface,full,memory,MB,N,hidden size,16384,1280.53125,1280.53125,1280.53125,"{""M"": 4096, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 80GB HBM3,2025-07-17 18:18:23,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,1024,0.01759999990463257,0.017311999574303627,0.017920000478625298,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,2048,0.02924799919128418,0.028863999992609024,0.029983999207615852,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,4096,0.05129599943757057,0.050624001771211624,0.05209600180387497,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,8192,0.09344000369310379,0.09296000003814697,0.09382399916648865,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,16384,0.1791680008172989,0.17814399302005768,0.1796800047159195,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,32768,0.43830400705337524,0.43744000792503357,0.43929600715637207,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0 fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,1024,0.060095999389886856,0.059808000922203064,0.06054399907588959,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0 fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,2048,0.09084799885749817,0.09027200192213058,0.09161599725484848,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0 fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,4096,0.17820799350738525,0.17744000256061554,0.17897599935531616,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0 fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,8192,0.312608003616333,0.3118720054626465,0.31324800848960876,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0 fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,16384,0.574944019317627,0.5740479826927185,0.5756288051605225,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0 fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,32768,1.0943039655685425,1.0934272289276123,1.0951999425888062,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0 fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,1024,0.0352960005402565,0.03481600061058998,0.03811199963092804,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0 fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,2048,0.05430399999022484,0.05392000079154968,0.05503999814391136,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0 fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,4096,0.10592000186443329,0.1054655984044075,0.10630399733781815,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0 fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,8192,0.19679999351501465,0.19631999731063843,0.19724799692630768,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0 fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,16384,0.37436801195144653,0.3733760118484497,0.3752320110797882,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0 fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,32768,0.7376000285148621,0.7361343741416931,0.7391359806060791,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,1024,0.3147200047969818,0.30796160697937014,0.32764801383018494,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,2048,0.3089919984340668,0.30374398827552795,0.3226880133152008,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,4096,0.30691200494766235,0.3023296058177948,0.3205504059791565,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,8192,0.3246079981327057,0.3185984075069428,0.33656961321830753,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,16384,0.6010559797286987,0.5996800065040588,0.6026239991188049,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,32768,1.8402559757232666,1.8322880268096924,1.8461120128631592,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0 fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,1024,0.23878400027751923,0.23545600473880768,0.2507520020008087,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0 fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,2048,0.34513600170612335,0.34377598762512207,0.34678399562835693,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0 fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,4096,0.6330879926681519,0.631712019443512,0.6345599889755249,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0 fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,8192,1.1185599565505981,1.1172800064086914,1.1196800470352173,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0 fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,16384,2.0697600841522217,2.0678528785705566,2.0713536739349365,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0 fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,32768,3.9561920166015625,3.953824043273926,3.9581120014190674,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0 fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,1024,0.38916800916194916,0.3824320137500763,0.4037184059619903,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0 fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,2048,0.3890720009803772,0.38193280100822447,0.4032831907272339,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0 fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,4096,0.39715200662612915,0.3928639888763428,0.41097599267959595,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0 fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,8192,0.6275200247764587,0.6259520053863525,0.6287999749183655,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0 fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,16384,1.202239990234375,1.199679970741272,1.2048959732055664,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0 fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,32768,2.7738559246063232,2.7705343723297116,2.777868890762329,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,1024,0.15619200468063354,0.15376000106334686,0.1661248028278351,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,2048,0.15825600177049637,0.15600000321865082,0.16911999881267548,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,4096,0.16700799763202667,0.16502399742603302,0.1709440052509308,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,8192,0.1712000072002411,0.1700800061225891,0.17215999960899353,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,16384,0.42505601048469543,0.4233280122280121,0.42691200971603394,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,32768,1.4057759642601013,1.3944000005722046,1.4099839925765991,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0 fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,1024,0.1520960032939911,0.15136000514030457,0.1528960019350052,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0 fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,2048,0.2533760070800781,0.2524160146713257,0.25436800718307495,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0 fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,4096,0.4551039934158325,0.4540799856185913,0.45612800121307373,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0 fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,8192,0.8053439855575562,0.8038079738616943,0.806656002998352,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0 fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,16384,1.4933120012283325,1.492095947265625,1.49452805519104,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0 fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,32768,2.8600640296936035,2.8583295822143557,2.8612607955932616,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0 fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,1024,0.20175999402999878,0.199072003364563,0.2154303938150406,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,2048,0.20263999700546265,0.20000000298023224,0.21675519943237304,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,4096,0.25276800990104675,0.2515519857406616,0.2539199888706207,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,8192,0.4322720021009445,0.43088001012802124,0.4336000084877014,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,16384,0.8288000226020813,0.8266303777694701,0.8311295866966247,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,32768,2.03987193107605,2.0360767364501955,2.0436416149139403,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,1024,72.546875,72.546875,72.546875,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,2048,145.0859375,145.0859375,145.0859375,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,4096,290.1640625,290.1640625,290.1640625,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,8192,580.3203125,580.3203125,580.3203125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,16384,1160.6328125,1160.6328125,1160.6328125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,32768,2321.2578125,2321.2578125,2321.2578125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,1024,104.03173828125,104.03173828125,104.03173828125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,2048,208.05517578125,208.05517578125,208.05517578125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,4096,416.10205078125,416.10205078125,416.10205078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,8192,832.19580078125,832.19580078125,832.19580078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,16384,1664.3125,1664.3125,1664.3125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,32768,3328.625,3328.625,3328.625,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,1024,104.03564453125,104.03564453125,104.03564453125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,2048,208.06298828125,208.06298828125,208.06298828125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,4096,416.11767578125,416.11767578125,416.11767578125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,8192,832.22705078125,832.22705078125,832.22705078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,16384,1544.44580078125,1544.44580078125,1544.44580078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,32768,2960.8837890625,2960.8837890625,2960.8837890625,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0 fused_linear_grpo_loss_token,liger,forward,speed,ms,B,B,2,40.75366401672363,40.749671173095706,40.75765686035156,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:58:45,0.6.1 fused_linear_grpo_loss_token,liger,forward,speed,ms,B,B,4,80.95231628417969,80.95231628417969,80.95231628417969,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:58:45,0.6.1 fused_linear_grpo_loss_token,liger,forward,speed,ms,B,B,8,163.58604431152344,163.58604431152344,163.58604431152344,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:58:45,0.6.1 fused_linear_grpo_loss_token,liger,forward,speed,ms,B,B,16,323.6761474609375,323.6761474609375,323.6761474609375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:58:45,0.6.1 fused_linear_grpo_loss_token,torch,forward,speed,ms,B,B,2,23.71225643157959,23.612825775146483,23.8354434967041,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:59:51,0.6.1 fused_linear_grpo_loss_token,torch,forward,speed,ms,B,B,4,46.86131286621094,46.80355911254883,46.91906661987304,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:59:51,0.6.1 fused_linear_grpo_loss_token,torch,forward,speed,ms,B,B,8,94.54898834228516,94.54898834228516,94.54898834228516,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:59:51,0.6.1 fused_linear_grpo_loss_token,torch,forward,speed,ms,B,B,16,189.99501037597656,189.99501037597656,189.99501037597656,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-04 23:59:51,0.6.1 fused_linear_grpo_loss_token,liger,full,speed,ms,B,B,2,42.67263984680176,42.54085083007813,42.80442886352539,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:00:58,0.6.1 fused_linear_grpo_loss_token,liger,full,speed,ms,B,B,4,82.2446060180664,82.2446060180664,82.2446060180664,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:00:58,0.6.1 fused_linear_grpo_loss_token,liger,full,speed,ms,B,B,8,167.00416564941406,167.00416564941406,167.00416564941406,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:00:58,0.6.1 fused_linear_grpo_loss_token,liger,full,speed,ms,B,B,16,327.0911865234375,327.0911865234375,327.0911865234375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:00:58,0.6.1 fused_linear_grpo_loss_token,torch,full,speed,ms,B,B,2,45.36115264892578,45.241344451904304,45.480960845947266,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:02:07,0.6.1 fused_linear_grpo_loss_token,torch,full,speed,ms,B,B,4,90.00038146972656,90.00038146972656,90.00038146972656,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:02:07,0.6.1 fused_linear_grpo_loss_token,torch,full,speed,ms,B,B,8,177.22674560546875,177.22674560546875,177.22674560546875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:02:07,0.6.1 fused_linear_grpo_loss_token,torch,full,speed,ms,B,B,16,356.5383605957031,356.5383605957031,356.5383605957031,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:02:07,0.6.1 fused_linear_grpo_loss_token,liger,backward,speed,ms,B,B,2,1.814527988433838,1.8124799728393555,1.8167808055877686,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:03:11,0.6.1 fused_linear_grpo_loss_token,liger,backward,speed,ms,B,B,4,1.84934401512146,1.8472959995269775,1.8524160385131836,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:03:11,0.6.1 fused_linear_grpo_loss_token,liger,backward,speed,ms,B,B,8,1.891327977180481,1.8872319459915161,1.893990397453308,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:03:11,0.6.1 fused_linear_grpo_loss_token,liger,backward,speed,ms,B,B,16,1.9722239971160889,1.9660799503326416,1.9763200283050537,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:03:11,0.6.1 fused_linear_grpo_loss_token,torch,backward,speed,ms,B,B,2,22.014975547790527,21.710438537597657,22.19417533874512,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:04:16,0.6.1 fused_linear_grpo_loss_token,torch,backward,speed,ms,B,B,4,41.83603096008301,41.752165222167974,41.91989669799805,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:04:16,0.6.1 fused_linear_grpo_loss_token,torch,backward,speed,ms,B,B,8,81.66400146484375,81.66400146484375,81.66400146484375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:04:16,0.6.1 fused_linear_grpo_loss_token,torch,backward,speed,ms,B,B,16,162.6429443359375,162.6429443359375,162.6429443359375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:04:16,0.6.1 fused_linear_grpo_loss_token,liger,full,memory,MB,B,B,2,7344.77685546875,7344.77685546875,7344.77685546875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:05:31,0.6.1 fused_linear_grpo_loss_token,liger,full,memory,MB,B,B,4,7408.80029296875,7408.80029296875,7408.80029296875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:05:31,0.6.1 fused_linear_grpo_loss_token,liger,full,memory,MB,B,B,8,7536.84716796875,7536.84716796875,7536.84716796875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:05:31,0.6.1 fused_linear_grpo_loss_token,liger,full,memory,MB,B,B,16,7792.94091796875,7792.94091796875,7792.94091796875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:05:31,0.6.1 fused_linear_grpo_loss_token,torch,full,memory,MB,B,B,2,9083.28125,9083.28125,9083.28125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:06:37,0.6.1 fused_linear_grpo_loss_token,torch,full,memory,MB,B,B,4,13138.3125,13138.3125,13138.3125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:06:37,0.6.1 fused_linear_grpo_loss_token,torch,full,memory,MB,B,B,8,21250.375,21250.375,21250.375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:06:37,0.6.1 fused_linear_grpo_loss_token,torch,full,memory,MB,B,B,16,37474.5,37474.5,37474.5,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""token"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:06:37,0.6.1 fused_linear_grpo_loss_sequence,liger,forward,speed,ms,B,B,2,40.72038269042969,40.71178131103516,40.728984069824214,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:07:48,0.6.1 fused_linear_grpo_loss_sequence,liger,forward,speed,ms,B,B,4,81.69369506835938,81.69369506835938,81.69369506835938,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:07:48,0.6.1 fused_linear_grpo_loss_sequence,liger,forward,speed,ms,B,B,8,162.79653930664062,162.79653930664062,162.79653930664062,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:07:48,0.6.1 fused_linear_grpo_loss_sequence,liger,forward,speed,ms,B,B,16,323.6546630859375,323.6546630859375,323.6546630859375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:07:48,0.6.1 fused_linear_grpo_loss_sequence,torch,forward,speed,ms,B,B,2,23.70047950744629,23.628594589233398,23.732429122924806,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:08:54,0.6.1 fused_linear_grpo_loss_sequence,torch,forward,speed,ms,B,B,4,47.36921691894531,47.085364532470706,47.65306930541992,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:08:54,0.6.1 fused_linear_grpo_loss_sequence,torch,forward,speed,ms,B,B,8,94.83366394042969,94.83366394042969,94.83366394042969,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:08:54,0.6.1 fused_linear_grpo_loss_sequence,torch,forward,speed,ms,B,B,16,190.0963897705078,190.0963897705078,190.0963897705078,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:08:54,0.6.1 fused_linear_grpo_loss_sequence,liger,full,speed,ms,B,B,2,42.318336486816406,42.15214080810547,42.48453216552734,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:10:02,0.6.1 fused_linear_grpo_loss_sequence,liger,full,speed,ms,B,B,4,82.4616928100586,82.4616928100586,82.4616928100586,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:10:02,0.6.1 fused_linear_grpo_loss_sequence,liger,full,speed,ms,B,B,8,163.43756103515625,163.43756103515625,163.43756103515625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:10:02,0.6.1 fused_linear_grpo_loss_sequence,liger,full,speed,ms,B,B,16,325.4384765625,325.4384765625,325.4384765625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:10:02,0.6.1 fused_linear_grpo_loss_sequence,torch,full,speed,ms,B,B,2,45.99193572998047,45.80761489868165,46.176256561279295,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:11:10,0.6.1 fused_linear_grpo_loss_sequence,torch,full,speed,ms,B,B,4,88.57190704345703,88.57190704345703,88.57190704345703,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:11:10,0.6.1 fused_linear_grpo_loss_sequence,torch,full,speed,ms,B,B,8,176.94105529785156,176.94105529785156,176.94105529785156,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:11:10,0.6.1 fused_linear_grpo_loss_sequence,torch,full,speed,ms,B,B,16,356.0478820800781,356.0478820800781,356.0478820800781,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:11:10,0.6.1 fused_linear_grpo_loss_sequence,liger,backward,speed,ms,B,B,2,1.8242560029029846,1.8102271556854248,1.8309119939804077,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:12:14,0.6.1 fused_linear_grpo_loss_sequence,liger,backward,speed,ms,B,B,4,1.84934401512146,1.846886396408081,1.8534400463104248,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:12:14,0.6.1 fused_linear_grpo_loss_sequence,liger,backward,speed,ms,B,B,8,1.891327977180481,1.8892799615859985,1.8933759927749634,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:12:14,0.6.1 fused_linear_grpo_loss_sequence,liger,backward,speed,ms,B,B,16,1.9752960205078125,1.9722239971160889,1.977344036102295,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:12:14,0.6.1 fused_linear_grpo_loss_sequence,torch,backward,speed,ms,B,B,2,22.0262393951416,21.80997085571289,22.20482559204102,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:13:20,0.6.1 fused_linear_grpo_loss_sequence,torch,backward,speed,ms,B,B,4,41.54521560668945,41.224806213378905,41.865625,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:13:20,0.6.1 fused_linear_grpo_loss_sequence,torch,backward,speed,ms,B,B,8,81.21753692626953,81.21753692626953,81.21753692626953,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:13:20,0.6.1 fused_linear_grpo_loss_sequence,torch,backward,speed,ms,B,B,16,160.82022094726562,160.82022094726562,160.82022094726562,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:13:20,0.6.1 fused_linear_grpo_loss_sequence,liger,full,memory,MB,B,B,2,7344.77685546875,7344.77685546875,7344.77685546875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:14:28,0.6.1 fused_linear_grpo_loss_sequence,liger,full,memory,MB,B,B,4,7408.80029296875,7408.80029296875,7408.80029296875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:14:28,0.6.1 fused_linear_grpo_loss_sequence,liger,full,memory,MB,B,B,8,7536.84716796875,7536.84716796875,7536.84716796875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:14:28,0.6.1 fused_linear_grpo_loss_sequence,liger,full,memory,MB,B,B,16,7792.94091796875,7792.94091796875,7792.94091796875,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:14:28,0.6.1 fused_linear_grpo_loss_sequence,torch,full,memory,MB,B,B,2,9083.28125,9083.28125,9083.28125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:15:31,0.6.1 fused_linear_grpo_loss_sequence,torch,full,memory,MB,B,B,4,13138.3125,13138.3125,13138.3125,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:15:31,0.6.1 fused_linear_grpo_loss_sequence,torch,full,memory,MB,B,B,8,21250.375,21250.375,21250.375,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:15:31,0.6.1 fused_linear_grpo_loss_sequence,torch,full,memory,MB,B,B,16,37474.5,37474.5,37474.5,"{""T"": 1024, ""H"": 4096, ""V"": 128256, ""importance_sampling_level"": ""sequence"", ""dtype"": ""torch.bfloat16""}",NVIDIA A100-SXM4-80GB,2025-08-05 00:15:31,0.6.1 llama4_rope,liger,forward,speed,ms,H,hidden size,512,0.08249600231647491,0.08102399855852127,0.08432000130414963,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:01,0.6.1 llama4_rope,liger,forward,speed,ms,H,hidden size,2048,0.08169600367546082,0.08037760108709335,0.08329600095748901,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:01,0.6.1 llama4_rope,liger,forward,speed,ms,H,hidden size,8192,0.08128000050783157,0.07980799674987793,0.08329600095748901,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:01,0.6.1 llama4_rope,huggingface,forward,speed,ms,H,hidden size,512,0.03759999945759773,0.03612799942493439,0.03907199949026108,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:03,0.6.1 llama4_rope,huggingface,forward,speed,ms,H,hidden size,2048,0.06185600161552429,0.061267200857400894,0.06252799928188324,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:03,0.6.1 llama4_rope,huggingface,forward,speed,ms,H,hidden size,8192,0.206496000289917,0.20582400262355804,0.20716799795627594,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:03,0.6.1 llama4_rope,liger,backward,speed,ms,H,hidden size,512,0.15404799580574036,0.15241600573062897,0.15615999698638916,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:04,0.6.1 llama4_rope,liger,backward,speed,ms,H,hidden size,2048,0.1536320000886917,0.15190400183200836,0.1558080017566681,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:04,0.6.1 llama4_rope,liger,backward,speed,ms,H,hidden size,8192,0.15263999998569489,0.15094399452209473,0.15491199493408203,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:04,0.6.1 llama4_rope,huggingface,backward,speed,ms,H,hidden size,512,0.13760000467300415,0.13574400544166565,0.14009599387645721,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:05,0.6.1 llama4_rope,huggingface,backward,speed,ms,H,hidden size,2048,0.13600000739097595,0.13449600338935852,0.1382720023393631,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:05,0.6.1 llama4_rope,huggingface,backward,speed,ms,H,hidden size,8192,0.21011200547218323,0.20924800634384155,0.21110400557518005,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:05,0.6.1 llama4_rope,liger,full,speed,ms,H,hidden size,512,0.3652159869670868,0.3619840145111084,0.3699840009212494,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:07,0.6.1 llama4_rope,liger,full,speed,ms,H,hidden size,2048,0.3599040061235428,0.2881920039653778,0.36559998989105225,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:07,0.6.1 llama4_rope,liger,full,speed,ms,H,hidden size,8192,0.2874239981174469,0.2852480113506317,0.29029120206832887,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:07,0.6.1 llama4_rope,huggingface,full,speed,ms,H,hidden size,512,0.24691200256347656,0.24489599466323853,0.24961919784545897,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:08,0.6.1 llama4_rope,huggingface,full,speed,ms,H,hidden size,2048,0.24774399399757385,0.24582399427890778,0.2505407989025116,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:08,0.6.1 llama4_rope,huggingface,full,speed,ms,H,hidden size,8192,0.41414400935173035,0.41337600350379944,0.41491198539733887,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:08,0.6.1 llama4_rope,liger,full,memory,MB,H,hidden size,512,37.23486328125,37.23486328125,37.23486328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:08,0.6.1 llama4_rope,liger,full,memory,MB,H,hidden size,2048,52.89111328125,52.89111328125,52.89111328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:08,0.6.1 llama4_rope,liger,full,memory,MB,H,hidden size,8192,115.51611328125,115.51611328125,115.51611328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:08,0.6.1 llama4_rope,huggingface,full,memory,MB,H,hidden size,512,49.64111328125,49.64111328125,49.64111328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:08,0.6.1 llama4_rope,huggingface,full,memory,MB,H,hidden size,2048,102.51611328125,102.51611328125,102.51611328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:08,0.6.1 llama4_rope,huggingface,full,memory,MB,H,hidden size,8192,314.01611328125,314.01611328125,314.01611328125,"{""dtype"": ""torch.bfloat16"", ""seq_len"": 2048, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:08,0.6.1 llama4_rope,liger,forward,speed,ms,T,sequence length,1024,0.07417599856853485,0.07248000055551529,0.07596799731254578,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:10,0.6.1 llama4_rope,liger,forward,speed,ms,T,sequence length,2048,0.08182399719953537,0.08006399869918823,0.08380799740552902,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:10,0.6.1 llama4_rope,liger,forward,speed,ms,T,sequence length,4096,0.11708799749612808,0.1167680025100708,0.11744000017642975,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:10,0.6.1 llama4_rope,liger,forward,speed,ms,T,sequence length,8192,0.2165440022945404,0.21596799790859222,0.21715199947357178,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:10,0.6.1 llama4_rope,liger,forward,speed,ms,T,sequence length,16384,0.41756799817085266,0.41705599427223206,0.41811200976371765,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:10,0.6.1 llama4_rope,huggingface,forward,speed,ms,T,sequence length,1024,0.11644800007343292,0.11590400338172913,0.11708799749612808,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:12,0.6.1 llama4_rope,huggingface,forward,speed,ms,T,sequence length,2048,0.20659199357032776,0.20608000457286835,0.2072640061378479,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:12,0.6.1 llama4_rope,huggingface,forward,speed,ms,T,sequence length,4096,0.38553598523139954,0.3846847891807556,0.38624000549316406,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:12,0.6.1 llama4_rope,huggingface,forward,speed,ms,T,sequence length,8192,0.7411519885063171,0.7403839826583862,0.7420480251312256,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:12,0.6.1 llama4_rope,huggingface,forward,speed,ms,T,sequence length,16384,1.4553920030593872,1.4543871641159059,1.4562879800796509,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:12,0.6.1 llama4_rope,liger,backward,speed,ms,T,sequence length,1024,0.11840000003576279,0.11711999773979187,0.12031999975442886,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:15,0.6.1 llama4_rope,liger,backward,speed,ms,T,sequence length,2048,0.12336000055074692,0.12198399752378464,0.12489599734544754,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:15,0.6.1 llama4_rope,liger,backward,speed,ms,T,sequence length,4096,0.12380799651145935,0.12240000069141388,0.12559999525547028,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:15,0.6.1 llama4_rope,liger,backward,speed,ms,T,sequence length,8192,0.2170879989862442,0.2165759950876236,0.21753600239753723,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:15,0.6.1 llama4_rope,liger,backward,speed,ms,T,sequence length,16384,0.4175359904766083,0.41705599427223206,0.4181375920772552,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:15,0.6.1 llama4_rope,huggingface,backward,speed,ms,T,sequence length,1024,0.1189119964838028,0.11769600212574005,0.12003199756145477,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:17,0.6.1 llama4_rope,huggingface,backward,speed,ms,T,sequence length,2048,0.21011200547218323,0.20927999913692474,0.21119999885559082,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:17,0.6.1 llama4_rope,huggingface,backward,speed,ms,T,sequence length,4096,0.39740800857543945,0.3963199853897095,0.39824000000953674,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:17,0.6.1 llama4_rope,huggingface,backward,speed,ms,T,sequence length,8192,0.7540159821510315,0.7528960108757019,0.7550719976425171,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:17,0.6.1 llama4_rope,huggingface,backward,speed,ms,T,sequence length,16384,1.4822720289230347,1.4810559749603271,1.4833600521087646,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:17,0.6.1 llama4_rope,liger,full,speed,ms,T,sequence length,1024,0.2874400019645691,0.2853440046310425,0.29052799940109253,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:19,0.6.1 llama4_rope,liger,full,speed,ms,T,sequence length,2048,0.28646400570869446,0.2845759987831116,0.28963199257850647,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:19,0.6.1 llama4_rope,liger,full,speed,ms,T,sequence length,4096,0.29897600412368774,0.29660800099372864,0.302131199836731,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:19,0.6.1 llama4_rope,liger,full,speed,ms,T,sequence length,8192,0.4315840005874634,0.4304639995098114,0.43270400166511536,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:19,0.6.1 llama4_rope,liger,full,speed,ms,T,sequence length,16384,0.833184003829956,0.8322240114212036,0.8345024228096007,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:19,0.6.1 llama4_rope,huggingface,full,speed,ms,T,sequence length,1024,0.24592000246047974,0.24396799504756927,0.24876800179481506,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,huggingface,full,speed,ms,T,sequence length,2048,0.4138239920139313,0.41308799386024475,0.4145599901676178,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,huggingface,full,speed,ms,T,sequence length,4096,0.7800959944725037,0.7790719866752625,0.7810239791870117,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,huggingface,full,speed,ms,T,sequence length,8192,1.4911680221557617,1.4902976036071778,1.4922879934310913,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,huggingface,full,speed,ms,T,sequence length,16384,2.9344160556793213,2.9333438873291016,2.9353599548339844,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,liger,full,memory,MB,T,sequence length,1024,73.75830078125,73.75830078125,73.75830078125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,liger,full,memory,MB,T,sequence length,2048,115.51611328125,115.51611328125,115.51611328125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,liger,full,memory,MB,T,sequence length,4096,199.03173828125,199.03173828125,199.03173828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,liger,full,memory,MB,T,sequence length,8192,366.06298828125,366.06298828125,366.06298828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,liger,full,memory,MB,T,sequence length,16384,700.12548828125,700.12548828125,700.12548828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,huggingface,full,memory,MB,T,sequence length,1024,173.00830078125,173.00830078125,173.00830078125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,huggingface,full,memory,MB,T,sequence length,2048,314.01611328125,314.01611328125,314.01611328125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,huggingface,full,memory,MB,T,sequence length,4096,596.03173828125,596.03173828125,596.03173828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,huggingface,full,memory,MB,T,sequence length,8192,1160.06298828125,1160.06298828125,1160.06298828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 llama4_rope,huggingface,full,memory,MB,T,sequence length,16384,2288.12548828125,2288.12548828125,2288.12548828125,"{""dtype"": ""torch.bfloat16"", ""hidden_size"": 8192, ""num_q_heads"": 32, ""num_kv_heads"": 8}",NVIDIA H100 80GB HBM3,2025-08-07 21:42:21,0.6.1 tiled_geglu,liger,full,speed,ms,T,sequence length,1024,2.1678080558776855,2.166579246520996,2.1682305335998535,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:48,0.6.3 tiled_geglu,liger,full,speed,ms,T,sequence length,2048,4.344256401062012,4.343987464904785,4.34452486038208,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:48,0.6.3 tiled_geglu,liger,full,speed,ms,T,sequence length,4096,8.653023719787598,8.653023719787598,8.653023719787598,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:48,0.6.3 tiled_geglu,liger,full,speed,ms,T,sequence length,8192,16.909311294555664,16.909311294555664,16.909311294555664,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:48,0.6.3 tiled_geglu,liger,full,speed,ms,T,sequence length,16384,33.63123321533203,33.63123321533203,33.63123321533203,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:48,0.6.3 tiled_geglu,liger_tiled,full,speed,ms,T,sequence length,1024,3.353935956954956,3.353523015975952,3.35434889793396,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:49,0.6.3 tiled_geglu,liger_tiled,full,speed,ms,T,sequence length,2048,6.023168087005615,6.023168087005615,6.023168087005615,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:49,0.6.3 tiled_geglu,liger_tiled,full,speed,ms,T,sequence length,4096,11.495424270629883,11.495424270629883,11.495424270629883,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:49,0.6.3 tiled_geglu,liger_tiled,full,speed,ms,T,sequence length,8192,23.68614387512207,23.68614387512207,23.68614387512207,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:49,0.6.3 tiled_geglu,liger_tiled,full,speed,ms,T,sequence length,16384,47.478782653808594,47.478782653808594,47.478782653808594,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:49,0.6.3 tiled_geglu,liger,forward,speed,ms,T,sequence length,1024,0.6614400148391724,0.6594560146331787,0.6635519862174988,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:52,0.6.3 tiled_geglu,liger,forward,speed,ms,T,sequence length,2048,1.3471999168395996,1.346560001373291,1.3475840091705322,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:52,0.6.3 tiled_geglu,liger,forward,speed,ms,T,sequence length,4096,2.752511978149414,2.7261502742767334,2.7844607830047607,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:52,0.6.3 tiled_geglu,liger,forward,speed,ms,T,sequence length,8192,5.433343887329102,5.433343887329102,5.433343887329102,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:52,0.6.3 tiled_geglu,liger,forward,speed,ms,T,sequence length,16384,10.712063789367676,10.712063789367676,10.712063789367676,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:52,0.6.3 tiled_geglu,liger_tiled,forward,speed,ms,T,sequence length,1024,0.7403519749641418,0.7402047514915466,0.7413759827613831,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:53,0.6.3 tiled_geglu,liger_tiled,forward,speed,ms,T,sequence length,2048,1.3941760063171387,1.3895679712295532,1.398144006729126,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:53,0.6.3 tiled_geglu,liger_tiled,forward,speed,ms,T,sequence length,4096,2.7586560249328613,2.7585408687591553,2.759884834289551,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:53,0.6.3 tiled_geglu,liger_tiled,forward,speed,ms,T,sequence length,8192,5.789696216583252,5.789696216583252,5.789696216583252,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:53,0.6.3 tiled_geglu,liger_tiled,forward,speed,ms,T,sequence length,16384,11.810815811157227,11.810815811157227,11.810815811157227,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:53,0.6.3 tiled_geglu,liger,backward,speed,ms,T,sequence length,1024,1.491968035697937,1.4916608333587646,1.4940160512924194,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:56,0.6.3 tiled_geglu,liger,backward,speed,ms,T,sequence length,2048,3.0185279846191406,3.0131328105926514,3.0555264949798584,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:56,0.6.3 tiled_geglu,liger,backward,speed,ms,T,sequence length,4096,6.021120071411133,6.021120071411133,6.021120071411133,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:56,0.6.3 tiled_geglu,liger,backward,speed,ms,T,sequence length,8192,11.512767791748047,11.512767791748047,11.512767791748047,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:56,0.6.3 tiled_geglu,liger,backward,speed,ms,T,sequence length,16384,22.806528091430664,22.806528091430664,22.806528091430664,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:56,0.6.3 tiled_geglu,liger_tiled,backward,speed,ms,T,sequence length,1024,2.6060800552368164,2.6053311824798584,2.607308864593506,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:57,0.6.3 tiled_geglu,liger_tiled,backward,speed,ms,T,sequence length,2048,4.665375709533691,4.664742469787598,4.666009426116943,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:57,0.6.3 tiled_geglu,liger_tiled,backward,speed,ms,T,sequence length,4096,8.71731185913086,8.71731185913086,8.71731185913086,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:57,0.6.3 tiled_geglu,liger_tiled,backward,speed,ms,T,sequence length,8192,17.99782371520996,17.99782371520996,17.99782371520996,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:57,0.6.3 tiled_geglu,liger_tiled,backward,speed,ms,T,sequence length,16384,35.64400100708008,35.64400100708008,35.64400100708008,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:57,0.6.3 tiled_geglu,liger,full,memory,MB,T,sequence length,1024,232.25,232.25,232.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:00,0.6.3 tiled_geglu,liger,full,memory,MB,T,sequence length,2048,336.25,336.25,336.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:00,0.6.3 tiled_geglu,liger,full,memory,MB,T,sequence length,4096,544.25,544.25,544.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:00,0.6.3 tiled_geglu,liger,full,memory,MB,T,sequence length,8192,960.25,960.25,960.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:00,0.6.3 tiled_geglu,liger,full,memory,MB,T,sequence length,16384,1792.25,1792.25,1792.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:00,0.6.3 tiled_geglu,liger_tiled,full,memory,MB,T,sequence length,1024,186.25,186.25,186.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:00,0.6.3 tiled_geglu,liger_tiled,full,memory,MB,T,sequence length,2048,244.25,244.25,244.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:00,0.6.3 tiled_geglu,liger_tiled,full,memory,MB,T,sequence length,4096,360.25,360.25,360.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:00,0.6.3 tiled_geglu,liger_tiled,full,memory,MB,T,sequence length,8192,592.25,592.25,592.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:00,0.6.3 tiled_geglu,liger_tiled,full,memory,MB,T,sequence length,16384,1056.25,1056.25,1056.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:00,0.6.3 tiled_geglu,liger,forward,memory,MB,T,sequence length,1024,128.25,128.25,128.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:03,0.6.3 tiled_geglu,liger,forward,memory,MB,T,sequence length,2048,192.25,192.25,192.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:03,0.6.3 tiled_geglu,liger,forward,memory,MB,T,sequence length,4096,320.25,320.25,320.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:03,0.6.3 tiled_geglu,liger,forward,memory,MB,T,sequence length,8192,576.25,576.25,576.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:03,0.6.3 tiled_geglu,liger,forward,memory,MB,T,sequence length,16384,1088.25,1088.25,1088.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:03,0.6.3 tiled_geglu,liger_tiled,forward,memory,MB,T,sequence length,1024,92.25,92.25,92.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:04,0.6.3 tiled_geglu,liger_tiled,forward,memory,MB,T,sequence length,2048,120.25,120.25,120.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:04,0.6.3 tiled_geglu,liger_tiled,forward,memory,MB,T,sequence length,4096,176.25,176.25,176.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:04,0.6.3 tiled_geglu,liger_tiled,forward,memory,MB,T,sequence length,8192,288.25,288.25,288.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:04,0.6.3 tiled_geglu,liger_tiled,forward,memory,MB,T,sequence length,16384,512.25,512.25,512.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:04,0.6.3 tiled_geglu,liger,backward,memory,MB,T,sequence length,1024,232.25,232.25,232.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:05,0.6.3 tiled_geglu,liger,backward,memory,MB,T,sequence length,2048,336.25,336.25,336.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:05,0.6.3 tiled_geglu,liger,backward,memory,MB,T,sequence length,4096,544.25,544.25,544.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:05,0.6.3 tiled_geglu,liger,backward,memory,MB,T,sequence length,8192,960.25,960.25,960.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:05,0.6.3 tiled_geglu,liger,backward,memory,MB,T,sequence length,16384,1792.25,1792.25,1792.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:05,0.6.3 tiled_geglu,liger_tiled,backward,memory,MB,T,sequence length,1024,186.25,186.25,186.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:06,0.6.3 tiled_geglu,liger_tiled,backward,memory,MB,T,sequence length,2048,244.25,244.25,244.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:06,0.6.3 tiled_geglu,liger_tiled,backward,memory,MB,T,sequence length,4096,360.25,360.25,360.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:06,0.6.3 tiled_geglu,liger_tiled,backward,memory,MB,T,sequence length,8192,592.25,592.25,592.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:06,0.6.3 tiled_geglu,liger_tiled,backward,memory,MB,T,sequence length,16384,1056.25,1056.25,1056.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:06,0.6.3 tiled_swiglu,liger,full,speed,ms,T,sequence length,1024,2.165760040283203,2.164659261703491,2.167193651199341,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:10,0.6.3 tiled_swiglu,liger,full,speed,ms,T,sequence length,2048,4.371456146240234,4.368383884429932,4.374527931213379,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:10,0.6.3 tiled_swiglu,liger,full,speed,ms,T,sequence length,4096,8.935423851013184,8.935423851013184,8.935423851013184,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:10,0.6.3 tiled_swiglu,liger,full,speed,ms,T,sequence length,8192,17.078943252563477,17.078943252563477,17.078943252563477,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:10,0.6.3 tiled_swiglu,liger,full,speed,ms,T,sequence length,16384,33.74857711791992,33.74857711791992,33.74857711791992,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:10,0.6.3 tiled_swiglu,liger_tiled,full,speed,ms,T,sequence length,1024,3.3510398864746094,3.3507328033447266,3.3513472080230713,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:11,0.6.3 tiled_swiglu,liger_tiled,full,speed,ms,T,sequence length,2048,6.023168087005615,6.023168087005615,6.023168087005615,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:11,0.6.3 tiled_swiglu,liger_tiled,full,speed,ms,T,sequence length,4096,11.609087944030762,11.609087944030762,11.609087944030762,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:11,0.6.3 tiled_swiglu,liger_tiled,full,speed,ms,T,sequence length,8192,23.8591365814209,23.8591365814209,23.8591365814209,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:11,0.6.3 tiled_swiglu,liger_tiled,full,speed,ms,T,sequence length,16384,47.721473693847656,47.721473693847656,47.721473693847656,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:11,0.6.3 tiled_swiglu,liger,forward,speed,ms,T,sequence length,1024,0.6594560146331787,0.6594560146331787,0.6604800224304199,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:14,0.6.3 tiled_swiglu,liger,forward,speed,ms,T,sequence length,2048,1.3537280559539795,1.3527040481567383,1.3547519445419312,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:14,0.6.3 tiled_swiglu,liger,forward,speed,ms,T,sequence length,4096,2.7152960300445557,2.715123176574707,2.7155072689056396,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:14,0.6.3 tiled_swiglu,liger,forward,speed,ms,T,sequence length,8192,5.3361921310424805,5.3361921310424805,5.3361921310424805,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:14,0.6.3 tiled_swiglu,liger,forward,speed,ms,T,sequence length,16384,10.870783805847168,10.870783805847168,10.870783805847168,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:14,0.6.3 tiled_swiglu,liger_tiled,forward,speed,ms,T,sequence length,1024,0.7395360469818115,0.7383040189743042,0.7413759827613831,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:15,0.6.3 tiled_swiglu,liger_tiled,forward,speed,ms,T,sequence length,2048,1.3965599536895752,1.387935996055603,1.4024640321731567,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:15,0.6.3 tiled_swiglu,liger_tiled,forward,speed,ms,T,sequence length,4096,2.7778561115264893,2.777395248413086,2.7780096530914307,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:15,0.6.3 tiled_swiglu,liger_tiled,forward,speed,ms,T,sequence length,8192,5.829631805419922,5.829631805419922,5.829631805419922,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:15,0.6.3 tiled_swiglu,liger_tiled,forward,speed,ms,T,sequence length,16384,11.841535568237305,11.841535568237305,11.841535568237305,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:15,0.6.3 tiled_swiglu,liger,backward,speed,ms,T,sequence length,1024,1.4970879554748535,1.4961408376693726,1.4970879554748535,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:17,0.6.3 tiled_swiglu,liger,backward,speed,ms,T,sequence length,2048,3.052351951599121,3.0518529415130615,3.0550782680511475,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:17,0.6.3 tiled_swiglu,liger,backward,speed,ms,T,sequence length,4096,6.074687957763672,6.074687957763672,6.074687957763672,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:17,0.6.3 tiled_swiglu,liger,backward,speed,ms,T,sequence length,8192,11.630592346191406,11.630592346191406,11.630592346191406,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:17,0.6.3 tiled_swiglu,liger,backward,speed,ms,T,sequence length,16384,22.76793670654297,22.76793670654297,22.76793670654297,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:17,0.6.3 tiled_swiglu,liger_tiled,backward,speed,ms,T,sequence length,1024,2.6021440029144287,2.6000702381134033,2.6032767295837402,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:18,0.6.3 tiled_swiglu,liger_tiled,backward,speed,ms,T,sequence length,2048,4.641791820526123,4.641791820526123,4.641791820526123,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:18,0.6.3 tiled_swiglu,liger_tiled,backward,speed,ms,T,sequence length,4096,8.761343955993652,8.761343955993652,8.761343955993652,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:18,0.6.3 tiled_swiglu,liger_tiled,backward,speed,ms,T,sequence length,8192,17.966079711914062,17.966079711914062,17.966079711914062,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:18,0.6.3 tiled_swiglu,liger_tiled,backward,speed,ms,T,sequence length,16384,35.657344818115234,35.657344818115234,35.657344818115234,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:18,0.6.3 tiled_swiglu,liger,full,memory,MB,T,sequence length,1024,232.25,232.25,232.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:21,0.6.3 tiled_swiglu,liger,full,memory,MB,T,sequence length,2048,336.25,336.25,336.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:21,0.6.3 tiled_swiglu,liger,full,memory,MB,T,sequence length,4096,544.25,544.25,544.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:21,0.6.3 tiled_swiglu,liger,full,memory,MB,T,sequence length,8192,960.25,960.25,960.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:21,0.6.3 tiled_swiglu,liger,full,memory,MB,T,sequence length,16384,1792.25,1792.25,1792.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:21,0.6.3 tiled_swiglu,liger_tiled,full,memory,MB,T,sequence length,1024,186.25,186.25,186.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:22,0.6.3 tiled_swiglu,liger_tiled,full,memory,MB,T,sequence length,2048,244.25,244.25,244.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:22,0.6.3 tiled_swiglu,liger_tiled,full,memory,MB,T,sequence length,4096,360.25,360.25,360.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:22,0.6.3 tiled_swiglu,liger_tiled,full,memory,MB,T,sequence length,8192,592.25,592.25,592.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:22,0.6.3 tiled_swiglu,liger_tiled,full,memory,MB,T,sequence length,16384,1056.25,1056.25,1056.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:22,0.6.3 tiled_swiglu,liger,forward,memory,MB,T,sequence length,1024,128.25,128.25,128.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:25,0.6.3 tiled_swiglu,liger,forward,memory,MB,T,sequence length,2048,192.25,192.25,192.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:25,0.6.3 tiled_swiglu,liger,forward,memory,MB,T,sequence length,4096,320.25,320.25,320.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:25,0.6.3 tiled_swiglu,liger,forward,memory,MB,T,sequence length,8192,576.25,576.25,576.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:25,0.6.3 tiled_swiglu,liger,forward,memory,MB,T,sequence length,16384,1088.25,1088.25,1088.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:25,0.6.3 tiled_swiglu,liger_tiled,forward,memory,MB,T,sequence length,1024,92.25,92.25,92.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:25,0.6.3 tiled_swiglu,liger_tiled,forward,memory,MB,T,sequence length,2048,120.25,120.25,120.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:25,0.6.3 tiled_swiglu,liger_tiled,forward,memory,MB,T,sequence length,4096,176.25,176.25,176.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:25,0.6.3 tiled_swiglu,liger_tiled,forward,memory,MB,T,sequence length,8192,288.25,288.25,288.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:25,0.6.3 tiled_swiglu,liger_tiled,forward,memory,MB,T,sequence length,16384,512.25,512.25,512.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:25,0.6.3 tiled_swiglu,liger,backward,memory,MB,T,sequence length,1024,232.25,232.25,232.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:27,0.6.3 tiled_swiglu,liger,backward,memory,MB,T,sequence length,2048,336.25,336.25,336.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:27,0.6.3 tiled_swiglu,liger,backward,memory,MB,T,sequence length,4096,544.25,544.25,544.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:27,0.6.3 tiled_swiglu,liger,backward,memory,MB,T,sequence length,8192,960.25,960.25,960.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:27,0.6.3 tiled_swiglu,liger,backward,memory,MB,T,sequence length,16384,1792.25,1792.25,1792.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:27,0.6.3 tiled_swiglu,liger_tiled,backward,memory,MB,T,sequence length,1024,186.25,186.25,186.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:28,0.6.3 tiled_swiglu,liger_tiled,backward,memory,MB,T,sequence length,2048,244.25,244.25,244.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:28,0.6.3 tiled_swiglu,liger_tiled,backward,memory,MB,T,sequence length,4096,360.25,360.25,360.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:28,0.6.3 tiled_swiglu,liger_tiled,backward,memory,MB,T,sequence length,8192,592.25,592.25,592.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:28,0.6.3 tiled_swiglu,liger_tiled,backward,memory,MB,T,sequence length,16384,1056.25,1056.25,1056.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:28,0.6.3 tiled_geglu,huggingface,full,speed,ms,T,sequence length,1024,2.3357439041137695,2.3357439041137695,2.3375871181488037,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:47,0.6.3 tiled_geglu,huggingface,full,speed,ms,T,sequence length,2048,4.764671802520752,4.764671802520752,4.764671802520752,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:47,0.6.3 tiled_geglu,huggingface,full,speed,ms,T,sequence length,4096,9.4236478805542,9.4236478805542,9.4236478805542,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:47,0.6.3 tiled_geglu,huggingface,full,speed,ms,T,sequence length,8192,17.628543853759766,17.628543853759766,17.628543853759766,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:47,0.6.3 tiled_geglu,huggingface,full,speed,ms,T,sequence length,16384,35.06790542602539,35.06790542602539,35.06790542602539,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:47,0.6.3 tiled_geglu,deepspeed_tiled,full,speed,ms,T,sequence length,1024,3.418976068496704,3.4176511764526367,3.4203009605407715,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:51,0.6.3 tiled_geglu,deepspeed_tiled,full,speed,ms,T,sequence length,2048,6.158143997192383,6.158143997192383,6.158143997192383,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:51,0.6.3 tiled_geglu,deepspeed_tiled,full,speed,ms,T,sequence length,4096,11.934720039367676,11.934720039367676,11.934720039367676,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:51,0.6.3 tiled_geglu,deepspeed_tiled,full,speed,ms,T,sequence length,8192,24.731647491455078,24.731647491455078,24.731647491455078,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:51,0.6.3 tiled_geglu,deepspeed_tiled,full,speed,ms,T,sequence length,16384,49.46227264404297,49.46227264404297,49.46227264404297,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:51,0.6.3 tiled_geglu,huggingface,forward,speed,ms,T,sequence length,1024,0.6743040084838867,0.6736640334129333,0.677068829536438,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:52,0.6.3 tiled_geglu,huggingface,forward,speed,ms,T,sequence length,2048,1.418239951133728,1.418239951133728,1.421120047569275,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:52,0.6.3 tiled_geglu,huggingface,forward,speed,ms,T,sequence length,4096,2.88972806930542,2.889113664627075,2.8909568786621094,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:52,0.6.3 tiled_geglu,huggingface,forward,speed,ms,T,sequence length,8192,5.701375961303711,5.701375961303711,5.701375961303711,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:52,0.6.3 tiled_geglu,huggingface,forward,speed,ms,T,sequence length,16384,11.276288032531738,11.276288032531738,11.276288032531738,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:52,0.6.3 tiled_geglu,deepspeed_tiled,forward,speed,ms,T,sequence length,1024,0.7433919906616211,0.7423999905586243,0.7444480061531067,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:54,0.6.3 tiled_geglu,deepspeed_tiled,forward,speed,ms,T,sequence length,2048,1.4137760400772095,1.4131200313568115,1.4152319431304932,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:54,0.6.3 tiled_geglu,deepspeed_tiled,forward,speed,ms,T,sequence length,4096,2.8241920471191406,2.823500871658325,2.8266496658325195,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:54,0.6.3 tiled_geglu,deepspeed_tiled,forward,speed,ms,T,sequence length,8192,6.087679862976074,6.087679862976074,6.087679862976074,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:54,0.6.3 tiled_geglu,deepspeed_tiled,forward,speed,ms,T,sequence length,16384,12.353535652160645,12.353535652160645,12.353535652160645,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:54,0.6.3 tiled_geglu,huggingface,backward,speed,ms,T,sequence length,1024,1.5499199628829956,1.5489535331726074,1.5523840188980103,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:55,0.6.3 tiled_geglu,huggingface,backward,speed,ms,T,sequence length,2048,3.171328067779541,3.169484853744507,3.173171281814575,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:55,0.6.3 tiled_geglu,huggingface,backward,speed,ms,T,sequence length,4096,6.263807773590088,6.263807773590088,6.263807773590088,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:55,0.6.3 tiled_geglu,huggingface,backward,speed,ms,T,sequence length,8192,12.046143531799316,12.046143531799316,12.046143531799316,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:55,0.6.3 tiled_geglu,huggingface,backward,speed,ms,T,sequence length,16384,23.839744567871094,23.839744567871094,23.839744567871094,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:55,0.6.3 tiled_geglu,deepspeed_tiled,backward,speed,ms,T,sequence length,1024,2.6757121086120605,2.6755776405334473,2.676710367202759,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:58,0.6.3 tiled_geglu,deepspeed_tiled,backward,speed,ms,T,sequence length,2048,4.7329277992248535,4.7329277992248535,4.7329277992248535,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:58,0.6.3 tiled_geglu,deepspeed_tiled,backward,speed,ms,T,sequence length,4096,9.078783988952637,9.078783988952637,9.078783988952637,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:58,0.6.3 tiled_geglu,deepspeed_tiled,backward,speed,ms,T,sequence length,8192,18.63680076599121,18.63680076599121,18.63680076599121,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:58,0.6.3 tiled_geglu,deepspeed_tiled,backward,speed,ms,T,sequence length,16384,37.06163024902344,37.06163024902344,37.06163024902344,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:58,0.6.3 tiled_geglu,huggingface,full,memory,MB,T,sequence length,1024,264.25,264.25,264.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:59,0.6.3 tiled_geglu,huggingface,full,memory,MB,T,sequence length,2048,400.25,400.25,400.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:59,0.6.3 tiled_geglu,huggingface,full,memory,MB,T,sequence length,4096,688.25,688.25,688.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:59,0.6.3 tiled_geglu,huggingface,full,memory,MB,T,sequence length,8192,1264.25,1264.25,1264.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:59,0.6.3 tiled_geglu,huggingface,full,memory,MB,T,sequence length,16384,2416.25,2416.25,2416.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:22:59,0.6.3 tiled_geglu,deepspeed_tiled,full,memory,MB,T,sequence length,1024,190.25,190.25,190.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:02,0.6.3 tiled_geglu,deepspeed_tiled,full,memory,MB,T,sequence length,2048,252.25,252.25,252.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:02,0.6.3 tiled_geglu,deepspeed_tiled,full,memory,MB,T,sequence length,4096,376.25,376.25,376.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:02,0.6.3 tiled_geglu,deepspeed_tiled,full,memory,MB,T,sequence length,8192,640.25,640.25,640.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:02,0.6.3 tiled_geglu,deepspeed_tiled,full,memory,MB,T,sequence length,16384,1168.25,1168.25,1168.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:02,0.6.3 tiled_geglu,huggingface,forward,memory,MB,T,sequence length,1024,144.25,144.25,144.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:02,0.6.3 tiled_geglu,huggingface,forward,memory,MB,T,sequence length,2048,224.25,224.25,224.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:02,0.6.3 tiled_geglu,huggingface,forward,memory,MB,T,sequence length,4096,384.25,384.25,384.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:02,0.6.3 tiled_geglu,huggingface,forward,memory,MB,T,sequence length,8192,704.25,704.25,704.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:02,0.6.3 tiled_geglu,huggingface,forward,memory,MB,T,sequence length,16384,1344.25,1344.25,1344.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:02,0.6.3 tiled_geglu,deepspeed_tiled,forward,memory,MB,T,sequence length,1024,90.25,90.25,90.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:04,0.6.3 tiled_geglu,deepspeed_tiled,forward,memory,MB,T,sequence length,2048,116.25,116.25,116.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:04,0.6.3 tiled_geglu,deepspeed_tiled,forward,memory,MB,T,sequence length,4096,168.25,168.25,168.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:04,0.6.3 tiled_geglu,deepspeed_tiled,forward,memory,MB,T,sequence length,8192,272.25,272.25,272.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:04,0.6.3 tiled_geglu,deepspeed_tiled,forward,memory,MB,T,sequence length,16384,480.25,480.25,480.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:04,0.6.3 tiled_geglu,huggingface,backward,memory,MB,T,sequence length,1024,264.25,264.25,264.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:05,0.6.3 tiled_geglu,huggingface,backward,memory,MB,T,sequence length,2048,400.25,400.25,400.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:05,0.6.3 tiled_geglu,huggingface,backward,memory,MB,T,sequence length,4096,688.25,688.25,688.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:05,0.6.3 tiled_geglu,huggingface,backward,memory,MB,T,sequence length,8192,1264.25,1264.25,1264.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:05,0.6.3 tiled_geglu,huggingface,backward,memory,MB,T,sequence length,16384,2416.25,2416.25,2416.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:05,0.6.3 tiled_geglu,deepspeed_tiled,backward,memory,MB,T,sequence length,1024,190.25,190.25,190.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:07,0.6.3 tiled_geglu,deepspeed_tiled,backward,memory,MB,T,sequence length,2048,252.25,252.25,252.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:07,0.6.3 tiled_geglu,deepspeed_tiled,backward,memory,MB,T,sequence length,4096,376.25,376.25,376.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:07,0.6.3 tiled_geglu,deepspeed_tiled,backward,memory,MB,T,sequence length,8192,640.25,640.25,640.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:07,0.6.3 tiled_geglu,deepspeed_tiled,backward,memory,MB,T,sequence length,16384,1168.25,1168.25,1168.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""gelu_pytorch_tanh"", ""activation_type"": ""geglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:07,0.6.3 tiled_swiglu,huggingface,full,speed,ms,T,sequence length,1024,2.2517759799957275,2.2517759799957275,2.254848003387451,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:08,0.6.3 tiled_swiglu,huggingface,full,speed,ms,T,sequence length,2048,4.588511943817139,4.587302207946777,4.5897216796875,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:08,0.6.3 tiled_swiglu,huggingface,full,speed,ms,T,sequence length,4096,9.233407974243164,9.233407974243164,9.233407974243164,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:08,0.6.3 tiled_swiglu,huggingface,full,speed,ms,T,sequence length,8192,17.869823455810547,17.869823455810547,17.869823455810547,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:08,0.6.3 tiled_swiglu,huggingface,full,speed,ms,T,sequence length,16384,35.34422302246094,35.34422302246094,35.34422302246094,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:08,0.6.3 tiled_swiglu,deepspeed_tiled,full,speed,ms,T,sequence length,1024,3.4257922172546387,3.424870491027832,3.426713705062866,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:12,0.6.3 tiled_swiglu,deepspeed_tiled,full,speed,ms,T,sequence length,2048,6.155263900756836,6.155263900756836,6.155263900756836,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:12,0.6.3 tiled_swiglu,deepspeed_tiled,full,speed,ms,T,sequence length,4096,11.92959976196289,11.92959976196289,11.92959976196289,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:12,0.6.3 tiled_swiglu,deepspeed_tiled,full,speed,ms,T,sequence length,8192,24.815616607666016,24.815616607666016,24.815616607666016,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:12,0.6.3 tiled_swiglu,deepspeed_tiled,full,speed,ms,T,sequence length,16384,49.62918472290039,49.62918472290039,49.62918472290039,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:12,0.6.3 tiled_swiglu,huggingface,forward,speed,ms,T,sequence length,1024,0.6748160123825073,0.6737920045852661,0.6758400201797485,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:13,0.6.3 tiled_swiglu,huggingface,forward,speed,ms,T,sequence length,2048,1.4332799911499023,1.4325759410858154,1.4335999488830566,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:13,0.6.3 tiled_swiglu,huggingface,forward,speed,ms,T,sequence length,4096,2.91212797164917,2.904217481613159,2.9146623611450195,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:13,0.6.3 tiled_swiglu,huggingface,forward,speed,ms,T,sequence length,8192,5.658976078033447,5.658976078033447,5.658976078033447,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:13,0.6.3 tiled_swiglu,huggingface,forward,speed,ms,T,sequence length,16384,11.341952323913574,11.341952323913574,11.341952323913574,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:13,0.6.3 tiled_swiglu,deepspeed_tiled,forward,speed,ms,T,sequence length,1024,0.7454720139503479,0.7429631948471069,0.7456768155097961,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:15,0.6.3 tiled_swiglu,deepspeed_tiled,forward,speed,ms,T,sequence length,2048,1.4120960235595703,1.410048007965088,1.4120960235595703,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:15,0.6.3 tiled_swiglu,deepspeed_tiled,forward,speed,ms,T,sequence length,4096,2.825216054916382,2.825216054916382,2.8264448642730713,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:15,0.6.3 tiled_swiglu,deepspeed_tiled,forward,speed,ms,T,sequence length,8192,6.077439785003662,6.077439785003662,6.077439785003662,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:15,0.6.3 tiled_swiglu,deepspeed_tiled,forward,speed,ms,T,sequence length,16384,12.356608390808105,12.356608390808105,12.356608390808105,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:15,0.6.3 tiled_swiglu,huggingface,backward,speed,ms,T,sequence length,1024,1.551360011100769,1.5511807203292847,1.5532032251358032,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:16,0.6.3 tiled_swiglu,huggingface,backward,speed,ms,T,sequence length,2048,3.1928319931030273,3.1885311603546143,3.1971328258514404,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:16,0.6.3 tiled_swiglu,huggingface,backward,speed,ms,T,sequence length,4096,6.273248195648193,6.273248195648193,6.273248195648193,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:16,0.6.3 tiled_swiglu,huggingface,backward,speed,ms,T,sequence length,8192,12.058752059936523,12.058752059936523,12.058752059936523,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:16,0.6.3 tiled_swiglu,huggingface,backward,speed,ms,T,sequence length,16384,23.853055953979492,23.853055953979492,23.853055953979492,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:16,0.6.3 tiled_swiglu,deepspeed_tiled,backward,speed,ms,T,sequence length,1024,2.6746881008148193,2.6728639602661133,2.6789886951446533,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:20,0.6.3 tiled_swiglu,deepspeed_tiled,backward,speed,ms,T,sequence length,2048,4.739071846008301,4.739071846008301,4.739071846008301,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:20,0.6.3 tiled_swiglu,deepspeed_tiled,backward,speed,ms,T,sequence length,4096,9.084927558898926,9.084927558898926,9.084927558898926,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:20,0.6.3 tiled_swiglu,deepspeed_tiled,backward,speed,ms,T,sequence length,8192,18.729759216308594,18.729759216308594,18.729759216308594,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:20,0.6.3 tiled_swiglu,deepspeed_tiled,backward,speed,ms,T,sequence length,16384,37.13724899291992,37.13724899291992,37.13724899291992,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:20,0.6.3 tiled_swiglu,huggingface,full,memory,MB,T,sequence length,1024,264.25,264.25,264.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:20,0.6.3 tiled_swiglu,huggingface,full,memory,MB,T,sequence length,2048,400.25,400.25,400.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:20,0.6.3 tiled_swiglu,huggingface,full,memory,MB,T,sequence length,4096,688.25,688.25,688.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:20,0.6.3 tiled_swiglu,huggingface,full,memory,MB,T,sequence length,8192,1264.25,1264.25,1264.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:20,0.6.3 tiled_swiglu,huggingface,full,memory,MB,T,sequence length,16384,2416.25,2416.25,2416.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:20,0.6.3 tiled_swiglu,deepspeed_tiled,full,memory,MB,T,sequence length,1024,190.25,190.25,190.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:23,0.6.3 tiled_swiglu,deepspeed_tiled,full,memory,MB,T,sequence length,2048,252.25,252.25,252.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:23,0.6.3 tiled_swiglu,deepspeed_tiled,full,memory,MB,T,sequence length,4096,376.25,376.25,376.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:23,0.6.3 tiled_swiglu,deepspeed_tiled,full,memory,MB,T,sequence length,8192,640.25,640.25,640.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:23,0.6.3 tiled_swiglu,deepspeed_tiled,full,memory,MB,T,sequence length,16384,1168.25,1168.25,1168.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:23,0.6.3 tiled_swiglu,huggingface,forward,memory,MB,T,sequence length,1024,144.25,144.25,144.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:24,0.6.3 tiled_swiglu,huggingface,forward,memory,MB,T,sequence length,2048,224.25,224.25,224.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:24,0.6.3 tiled_swiglu,huggingface,forward,memory,MB,T,sequence length,4096,384.25,384.25,384.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:24,0.6.3 tiled_swiglu,huggingface,forward,memory,MB,T,sequence length,8192,704.25,704.25,704.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:24,0.6.3 tiled_swiglu,huggingface,forward,memory,MB,T,sequence length,16384,1344.25,1344.25,1344.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:24,0.6.3 tiled_swiglu,deepspeed_tiled,forward,memory,MB,T,sequence length,1024,90.25,90.25,90.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:26,0.6.3 tiled_swiglu,deepspeed_tiled,forward,memory,MB,T,sequence length,2048,116.25,116.25,116.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:26,0.6.3 tiled_swiglu,deepspeed_tiled,forward,memory,MB,T,sequence length,4096,168.25,168.25,168.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:26,0.6.3 tiled_swiglu,deepspeed_tiled,forward,memory,MB,T,sequence length,8192,272.25,272.25,272.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:26,0.6.3 tiled_swiglu,deepspeed_tiled,forward,memory,MB,T,sequence length,16384,480.25,480.25,480.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:26,0.6.3 tiled_swiglu,huggingface,backward,memory,MB,T,sequence length,1024,264.25,264.25,264.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:26,0.6.3 tiled_swiglu,huggingface,backward,memory,MB,T,sequence length,2048,400.25,400.25,400.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:26,0.6.3 tiled_swiglu,huggingface,backward,memory,MB,T,sequence length,4096,688.25,688.25,688.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:26,0.6.3 tiled_swiglu,huggingface,backward,memory,MB,T,sequence length,8192,1264.25,1264.25,1264.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:26,0.6.3 tiled_swiglu,huggingface,backward,memory,MB,T,sequence length,16384,2416.25,2416.25,2416.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:26,0.6.3 tiled_swiglu,deepspeed_tiled,backward,memory,MB,T,sequence length,1024,190.25,190.25,190.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:29,0.6.3 tiled_swiglu,deepspeed_tiled,backward,memory,MB,T,sequence length,2048,252.25,252.25,252.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:29,0.6.3 tiled_swiglu,deepspeed_tiled,backward,memory,MB,T,sequence length,4096,376.25,376.25,376.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:29,0.6.3 tiled_swiglu,deepspeed_tiled,backward,memory,MB,T,sequence length,8192,640.25,640.25,640.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:29,0.6.3 tiled_swiglu,deepspeed_tiled,backward,memory,MB,T,sequence length,16384,1168.25,1168.25,1168.25,"{""bsz"": 2, ""hidden_size"": 2048, ""intermediate_size"": 4096, ""hidden_act"": ""silu"", ""activation_type"": ""swiglu"", ""num_shards"": 4, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 4090,2025-11-11 06:23:29,0.6.3 tvd,liger,full,memory,MB,V,vocab size,4096,1792.0009765625,1792.0009765625,1792.0009765625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,liger,full,memory,MB,V,vocab size,8192,3584.0009765625,3584.0009765625,3584.0009765625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,liger,full,memory,MB,V,vocab size,16384,7168.0009765625,7168.0009765625,7168.0009765625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,liger,full,memory,MB,V,vocab size,32768,14336.0009765625,14336.0009765625,14336.0009765625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,liger,full,memory,MB,V,vocab size,65536,28672.0,28672.0,28672.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,liger,full,memory,MB,V,vocab size,131072,57344.0,57344.0,57344.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,torch,full,memory,MB,V,vocab size,4096,2048.0009765625,2048.0009765625,2048.0009765625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,torch,full,memory,MB,V,vocab size,8192,4096.0009765625,4096.0009765625,4096.0009765625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,torch,full,memory,MB,V,vocab size,16384,8192.0009765625,8192.0009765625,8192.0009765625,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,torch,full,memory,MB,V,vocab size,32768,16384.0,16384.0,16384.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,torch,full,memory,MB,V,vocab size,65536,32768.0,32768.0,32768.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,torch,full,memory,MB,V,vocab size,131072,65536.0,65536.0,65536.0,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:48,0.7.0 tvd,liger,forward,speed,ms,V,vocab size,4096,0.2757120132446289,0.27487359642982484,0.27616640329360964,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:50,0.7.0 tvd,liger,forward,speed,ms,V,vocab size,8192,0.5338559746742249,0.5333759784698486,0.5346879959106445,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:50,0.7.0 tvd,liger,forward,speed,ms,V,vocab size,16384,1.0511679649353027,1.0505280494689941,1.0521472215652465,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:50,0.7.0 tvd,liger,forward,speed,ms,V,vocab size,32768,2.0986878871917725,2.09736967086792,2.0999168872833254,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:50,0.7.0 tvd,liger,forward,speed,ms,V,vocab size,65536,4.221951961517334,4.22039680480957,4.222847938537598,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:50,0.7.0 tvd,liger,forward,speed,ms,V,vocab size,131072,8.501215934753418,8.498592376708984,8.50380802154541,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:50,0.7.0 tvd,torch,forward,speed,ms,V,vocab size,4096,0.7288320064544678,0.727942419052124,0.7296640276908875,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:51,0.7.0 tvd,torch,forward,speed,ms,V,vocab size,8192,1.4264639616012573,1.42576003074646,1.4272960424423218,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:51,0.7.0 tvd,torch,forward,speed,ms,V,vocab size,16384,2.81440007686615,2.8132031917572022,2.815097618103027,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:51,0.7.0 tvd,torch,forward,speed,ms,V,vocab size,32768,5.5965118408203125,5.59548807144165,5.598131275177002,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:51,0.7.0 tvd,torch,forward,speed,ms,V,vocab size,65536,11.178752422332764,11.176428604125977,11.180454635620118,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:51,0.7.0 tvd,torch,forward,speed,ms,V,vocab size,131072,22.33670425415039,22.334880065917968,22.339027404785156,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:51,0.7.0 tvd,liger,full,speed,ms,V,vocab size,4096,1.123952031135559,1.1221888303756713,1.1291328191757202,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:53,0.7.0 tvd,liger,full,speed,ms,V,vocab size,8192,2.1660319566726685,2.162835216522217,2.169088077545166,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:53,0.7.0 tvd,liger,full,speed,ms,V,vocab size,16384,4.563424110412598,4.559807777404785,4.5669121742248535,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:53,0.7.0 tvd,liger,full,speed,ms,V,vocab size,32768,9.092079639434814,9.089529991149902,9.094182014465332,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:53,0.7.0 tvd,liger,full,speed,ms,V,vocab size,65536,18.217248916625977,18.20675277709961,18.219014739990236,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:53,0.7.0 tvd,liger,full,speed,ms,V,vocab size,131072,36.477935791015625,36.46965026855469,36.48622131347656,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:53,0.7.0 tvd,torch,full,speed,ms,V,vocab size,4096,2.1256959438323975,2.1249279975891113,2.1270463466644287,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:54,0.7.0 tvd,torch,full,speed,ms,V,vocab size,8192,4.191232204437256,4.189510250091553,4.192793464660644,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:54,0.7.0 tvd,torch,full,speed,ms,V,vocab size,16384,8.638431549072266,8.636992454528809,8.639007568359375,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:54,0.7.0 tvd,torch,full,speed,ms,V,vocab size,32768,17.25654411315918,17.25450286865234,17.25882225036621,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:54,0.7.0 tvd,torch,full,speed,ms,V,vocab size,65536,34.54822540283203,34.546746826171876,34.549703979492186,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:54,0.7.0 tvd,torch,full,speed,ms,V,vocab size,131072,69.17910766601562,69.17910766601562,69.17910766601562,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:54,0.7.0 tvd,liger,backward,speed,ms,V,vocab size,4096,0.8502079844474792,0.8484799861907959,0.8526080250740051,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:55,0.7.0 tvd,liger,backward,speed,ms,V,vocab size,8192,1.6321280002593994,1.629702377319336,1.6350399732589722,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:55,0.7.0 tvd,liger,backward,speed,ms,V,vocab size,16384,3.5109760761260986,3.5084415912628173,3.513107109069824,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:55,0.7.0 tvd,liger,backward,speed,ms,V,vocab size,32768,6.989071846008301,6.985472011566161,6.994240188598633,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:55,0.7.0 tvd,liger,backward,speed,ms,V,vocab size,65536,13.969247817993164,13.95904598236084,13.971328163146971,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:55,0.7.0 tvd,liger,backward,speed,ms,V,vocab size,131072,27.982528686523438,27.963673400878903,27.987577819824217,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:55,0.7.0 tvd,torch,backward,speed,ms,V,vocab size,4096,1.398911952972412,1.3979583740234376,1.4000320434570312,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:57,0.7.0 tvd,torch,backward,speed,ms,V,vocab size,8192,2.7701759338378906,2.7694976329803467,2.7718528747558593,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:57,0.7.0 tvd,torch,backward,speed,ms,V,vocab size,16384,5.828160047531128,5.8249921798706055,5.829792022705078,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:57,0.7.0 tvd,torch,backward,speed,ms,V,vocab size,32768,11.665760040283203,11.664883232116699,11.666317176818847,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:57,0.7.0 tvd,torch,backward,speed,ms,V,vocab size,65536,23.379840850830078,23.37938575744629,23.381267929077147,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:57,0.7.0 tvd,torch,backward,speed,ms,V,vocab size,131072,46.83844757080078,46.8328125,46.84408264160156,"{""B"": 8, ""T"": 2048}",NVIDIA H100 80GB HBM3,2026-03-03 23:02:57,0.7.0 group_norm,liger,forward,speed,ms,C,num_channels,32,0.017535999417304993,0.016863999888300896,0.01833599992096424,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:15,0.7.0 group_norm,liger,forward,speed,ms,C,num_channels,64,0.018848000094294548,0.018015999346971512,0.019487999379634857,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:15,0.7.0 group_norm,liger,forward,speed,ms,C,num_channels,128,0.026623999699950218,0.024607999250292778,0.026688000187277794,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:15,0.7.0 group_norm,liger,forward,speed,ms,C,num_channels,256,0.038943998515605927,0.03888000175356865,0.03903999924659729,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:15,0.7.0 group_norm,liger,forward,speed,ms,C,num_channels,512,0.06351999938488007,0.06345599889755249,0.06550399959087372,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:15,0.7.0 group_norm,liger,forward,speed,ms,C,num_channels,1024,0.11475200206041336,0.11468800157308578,0.11673600226640701,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:15,0.7.0 group_norm,liger,forward,speed,ms,C,num_channels,2048,0.21910400688648224,0.217056006193161,0.22115199267864227,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:15,0.7.0 group_norm,huggingface,forward,speed,ms,C,num_channels,32,0.030688000842928886,0.030592000111937523,0.030751999467611313,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:19,0.7.0 group_norm,huggingface,forward,speed,ms,C,num_channels,64,0.043007999658584595,0.04294399917125702,0.04303999990224838,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:19,0.7.0 group_norm,huggingface,forward,speed,ms,C,num_channels,128,0.07168000191450119,0.07161600142717361,0.07174400240182877,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:19,0.7.0 group_norm,huggingface,forward,speed,ms,C,num_channels,256,0.13516800105571747,0.1351040005683899,0.13523200154304504,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:19,0.7.0 group_norm,huggingface,forward,speed,ms,C,num_channels,512,0.25808000564575195,0.2580159902572632,0.25900799036026,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:19,0.7.0 group_norm,huggingface,forward,speed,ms,C,num_channels,1024,0.4986239969730377,0.4976640045642853,0.4997439980506897,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:19,0.7.0 group_norm,huggingface,forward,speed,ms,C,num_channels,2048,0.9819360077381134,0.9800639748573303,0.9830080270767212,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:19,0.7.0 group_norm,liger,full,speed,ms,C,num_channels,32,0.1658720001578331,0.16368000209331512,0.16958080232143402,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:22,0.7.0 group_norm,liger,full,speed,ms,C,num_channels,64,0.1730239987373352,0.17123199999332428,0.17520000040531158,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:22,0.7.0 group_norm,liger,full,speed,ms,C,num_channels,128,0.1695999950170517,0.16783360242843628,0.1717183977365494,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:22,0.7.0 group_norm,liger,full,speed,ms,C,num_channels,256,0.174112007021904,0.17206400632858276,0.17718400061130524,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:22,0.7.0 group_norm,liger,full,speed,ms,C,num_channels,512,0.18745599687099457,0.18636800348758698,0.18848000466823578,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:22,0.7.0 group_norm,liger,full,speed,ms,C,num_channels,1024,0.3388479948043823,0.33792001008987427,0.3400000035762787,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:22,0.7.0 group_norm,liger,full,speed,ms,C,num_channels,2048,0.6390079855918884,0.6371200084686279,0.6410560011863708,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:22,0.7.0 group_norm,huggingface,full,speed,ms,C,num_channels,32,0.08396799862384796,0.08390399813652039,0.08403199911117554,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:26,0.7.0 group_norm,huggingface,full,speed,ms,C,num_channels,64,0.11267200112342834,0.11260800063610077,0.1128000020980835,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:26,0.7.0 group_norm,huggingface,full,speed,ms,C,num_channels,128,0.20054399967193604,0.19868800044059753,0.20080000162124634,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:26,0.7.0 group_norm,huggingface,full,speed,ms,C,num_channels,256,0.35020801424980164,0.34828799962997437,0.3511039912700653,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:26,0.7.0 group_norm,huggingface,full,speed,ms,C,num_channels,512,0.6307839751243591,0.6297919750213623,0.6309120059013367,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:26,0.7.0 group_norm,huggingface,full,speed,ms,C,num_channels,1024,1.177664041519165,1.1766079664230347,1.1796480417251587,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:26,0.7.0 group_norm,huggingface,full,speed,ms,C,num_channels,2048,2.2947518825531006,2.292736053466797,2.296736001968384,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:26,0.7.0 group_norm,liger,backward,speed,ms,C,num_channels,32,0.06643199920654297,0.0655359998345375,0.06752000004053116,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:28,0.7.0 group_norm,liger,backward,speed,ms,C,num_channels,64,0.06732799857854843,0.0663679987192154,0.06838399916887283,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:28,0.7.0 group_norm,liger,backward,speed,ms,C,num_channels,128,0.07171200215816498,0.06969600170850754,0.07273600250482559,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:28,0.7.0 group_norm,liger,backward,speed,ms,C,num_channels,256,0.07580800354480743,0.07571200281381607,0.07683199644088745,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:28,0.7.0 group_norm,liger,backward,speed,ms,C,num_channels,512,0.12697599828243256,0.1249919980764389,0.12703999876976013,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:28,0.7.0 group_norm,liger,backward,speed,ms,C,num_channels,1024,0.2253440022468567,0.2252800017595291,0.22729599475860596,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:28,0.7.0 group_norm,liger,backward,speed,ms,C,num_channels,2048,0.42585599422454834,0.42396798729896545,0.4260160028934479,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:28,0.7.0 group_norm,huggingface,backward,speed,ms,C,num_channels,32,0.05532800033688545,0.05526399984955788,0.056352000683546066,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,huggingface,backward,speed,ms,C,num_channels,64,0.07372800260782242,0.07171200215816498,0.0739263966679573,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,huggingface,backward,speed,ms,C,num_channels,128,0.13315199315547943,0.13308799266815186,0.13331200182437897,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,huggingface,backward,speed,ms,C,num_channels,256,0.21916800737380981,0.21904000639915466,0.21926400065422058,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,huggingface,backward,speed,ms,C,num_channels,512,0.374783992767334,0.37379199266433716,0.37484800815582275,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,huggingface,backward,speed,ms,C,num_channels,1024,0.6820799708366394,0.6810240149497986,0.6839039921760559,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,huggingface,backward,speed,ms,C,num_channels,2048,1.3158719539642334,1.3157440423965454,1.3177599906921387,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,liger,full,memory,MB,C,num_channels,32,40.01171875,40.01171875,40.01171875,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,liger,full,memory,MB,C,num_channels,64,80.01953125,80.01953125,80.01953125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,liger,full,memory,MB,C,num_channels,128,160.03515625,160.03515625,160.03515625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,liger,full,memory,MB,C,num_channels,256,320.0703125,320.0703125,320.0703125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,liger,full,memory,MB,C,num_channels,512,640.140625,640.140625,640.140625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,liger,full,memory,MB,C,num_channels,1024,1280.28125,1280.28125,1280.28125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,liger,full,memory,MB,C,num_channels,2048,2560.5625,2560.5625,2560.5625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:31,0.7.0 group_norm,huggingface,full,memory,MB,C,num_channels,32,40.06640625,40.06640625,40.06640625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,full,memory,MB,C,num_channels,64,80.12890625,80.12890625,80.12890625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,full,memory,MB,C,num_channels,128,160.25390625,160.25390625,160.25390625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,full,memory,MB,C,num_channels,256,320.5078125,320.5078125,320.5078125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,full,memory,MB,C,num_channels,512,641.015625,641.015625,641.015625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,full,memory,MB,C,num_channels,1024,1282.03125,1282.03125,1282.03125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,full,memory,MB,C,num_channels,2048,2564.0625,2564.0625,2564.0625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,forward,memory,MB,C,num_channels,32,40.01171875,40.01171875,40.01171875,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,forward,memory,MB,C,num_channels,64,80.01953125,80.01953125,80.01953125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,forward,memory,MB,C,num_channels,128,160.03515625,160.03515625,160.03515625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,forward,memory,MB,C,num_channels,256,320.0703125,320.0703125,320.0703125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,forward,memory,MB,C,num_channels,512,640.140625,640.140625,640.140625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,forward,memory,MB,C,num_channels,1024,1280.28125,1280.28125,1280.28125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,forward,memory,MB,C,num_channels,2048,2560.5625,2560.5625,2560.5625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,forward,memory,MB,C,num_channels,32,40.06640625,40.06640625,40.06640625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,forward,memory,MB,C,num_channels,64,80.12890625,80.12890625,80.12890625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,forward,memory,MB,C,num_channels,128,160.25390625,160.25390625,160.25390625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,forward,memory,MB,C,num_channels,256,320.5078125,320.5078125,320.5078125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,forward,memory,MB,C,num_channels,512,641.015625,641.015625,641.015625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,forward,memory,MB,C,num_channels,1024,1282.03125,1282.03125,1282.03125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,forward,memory,MB,C,num_channels,2048,2564.0625,2564.0625,2564.0625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,backward,memory,MB,C,num_channels,32,40.01171875,40.01171875,40.01171875,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,backward,memory,MB,C,num_channels,64,80.01953125,80.01953125,80.01953125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,backward,memory,MB,C,num_channels,128,160.03515625,160.03515625,160.03515625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,backward,memory,MB,C,num_channels,256,320.0703125,320.0703125,320.0703125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,backward,memory,MB,C,num_channels,512,640.140625,640.140625,640.140625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,backward,memory,MB,C,num_channels,1024,1280.28125,1280.28125,1280.28125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,liger,backward,memory,MB,C,num_channels,2048,2560.5625,2560.5625,2560.5625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,backward,memory,MB,C,num_channels,32,40.06640625,40.06640625,40.06640625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,backward,memory,MB,C,num_channels,64,80.12890625,80.12890625,80.12890625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,backward,memory,MB,C,num_channels,128,160.25390625,160.25390625,160.25390625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,backward,memory,MB,C,num_channels,256,320.5078125,320.5078125,320.5078125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,backward,memory,MB,C,num_channels,512,641.015625,641.015625,641.015625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,backward,memory,MB,C,num_channels,1024,1282.03125,1282.03125,1282.03125,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0 group_norm,huggingface,backward,memory,MB,C,num_channels,2048,2564.0625,2564.0625,2564.0625,"{""M"": 128, ""H"": 512, ""channels_per_group"": 4, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA B200,2026-02-28 00:23:32,0.7.0