Unverified Commit 241b4cfe authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Refactor] Refactor FP8 & INT8 Quant Folder inside `w8a8` (#25293)


Signed-off-by: default avatarnicole-lihui <nicole.li@daocloud.io>
Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
Signed-off-by: default avatarcourage17340 <courage17340@163.com>
Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: default avatarJacob Kahn <jacobkahn1@gmail.com>
Signed-off-by: default avatarTyler Michael Smith <tlrmchlsmth@gmail.com>
Signed-off-by: default avatarFadi Arafeh <fadi.arafeh@arm.com>
Signed-off-by: default avatarRoger Wang <hey@rogerw.io>
Signed-off-by: default avatarAgata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
Signed-off-by: default avatarzxw <1020938856@qq.com>
Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
Signed-off-by: default avatarwang.yuqi <noooop@126.com>
Signed-off-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
Signed-off-by: default avatarKunshang Ji <kunshang.ji@intel.com>
Signed-off-by: default avatarchenlang <chen.lang5@zte.com.cn>
Signed-off-by: default avataryoukaichao <youkaichao@gmail.com>
Signed-off-by: default avatarJonas Kuebler <kuebj@amazon.com>
Signed-off-by: default avatarjiang1.li <jiang1.li@intel.com>
Signed-off-by: default avatarRussell Bryant <rbryant@redhat.com>
Signed-off-by: default avatarNickLucche <nlucches@redhat.com>
Signed-off-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
Signed-off-by: default avatarAlonKejzman <alonkeizman@gmail.com>
Signed-off-by: default avatarLucas Wilkinson <lwilkins@redhat.com>
Signed-off-by: default avatartaohui <taohui3@gmail.com>
Signed-off-by: default avatarTao Hui <taohui3@gmail.com>
Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
Signed-off-by: default avatarMatthew Bonanni <mbonanni001@gmail.com>
Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
Signed-off-by: default avatarEkagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com>
Signed-off-by: default avatarZhuohan Li <zhuohan123@gmail.com>
Signed-off-by: default avatarTomer Asida <57313761+tomeras91@users.noreply.github.com>
Signed-off-by: default avatarShu Wang. <shuw@nvidia.com>
Signed-off-by: default avatarNick Hill <nhill@redhat.com>
Signed-off-by: default avatarAleksandr Malyshev <maleksan@amd.com>
Signed-off-by: default avatarEugene Khvedchenia <ekhvedchenia@nvidia.com>
Signed-off-by: default avatarEugene Khvedchenya <ekhvedchenya@gmail.com>
Signed-off-by: default avataryiting.jiang <yiting.jiang@daocloud.io>
Signed-off-by: default avatarAndrew Sansom <andrew@protopia.ai>
Signed-off-by: default avatarxaguilar <Xavier.AguilarFruto@amd.com>
Signed-off-by: default avatarIceber Gu <caiwei95@hotmail.com>
Signed-off-by: default avatarTao He <linzhu.ht@alibaba-inc.com>
Signed-off-by: default avatarIcey <1790571317@qq.com>
Signed-off-by: default avatarSage Moore <sage@neuralmagic.com>
Signed-off-by: default avatar许文卿 <xwq391974@alibaba-inc.com>
Signed-off-by: default avatarChih-Chieh-Yang <7364402+cyang49@users.noreply.github.com>
Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
Signed-off-by: default avatarSeiji Eicher <seiji@anyscale.com>
Signed-off-by: default avatarSeiji Eicher <58963096+eicherseiji@users.noreply.github.com>
Signed-off-by: default avatarzjy0516 <riverclouds.zhu@qq.com>
Signed-off-by: default avatarKosseila (CloudThrill) <klouddude@gmail.com>
Signed-off-by: default avatarfrankwang28 <frank.wbb@hotmail.com>
Signed-off-by: default avatarFrank Wang <41319051+frankwang28@users.noreply.github.com>
Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
Signed-off-by: default avatarfhl2000 <63384265+fhl2000@users.noreply.github.com>
Signed-off-by: default avatarzixi-qi <qizixi@meta.com>
Signed-off-by: default avatarBram Wasti <bwasti@meta.com>
Signed-off-by: default avatarNaman Lalit <nl2688@nyu.edu>
Signed-off-by: default avatarChenheli Hua <huachenheli@outlook.com>
Signed-off-by: default avatarJunhong <liujunhong11@huawei.com>
Signed-off-by: default avatarJunhong Liu <98734602+LJH-LBJ@users.noreply.github.com>
Signed-off-by: default avatar22quinn <33176974+22quinn@users.noreply.github.com>
Signed-off-by: default avatarrentianyue-jk <rentianyue-jk@360shuke.com>
Signed-off-by: default avatarPeter Pan <Peter.Pan@daocloud.io>
Signed-off-by: default avatarPatrick Toulme <ptoulme@meta.com>
Signed-off-by: default avatarPatrick Toulme <pctoulme+1@gmail.com>
Signed-off-by: default avatarJiangyun Zhu <riverclouds.zhu@qq.com>
Signed-off-by: default avatarClayton Coleman <smarterclayton@gmail.com>
Signed-off-by: default avatarJialin Ouyang <jialino@meta.com>
Signed-off-by: default avatarJialin Ouyang <Jialin.Ouyang@gmail.com>
Signed-off-by: default avatarWeiliang Liu <weiliangl@nvidia.com>
Signed-off-by: default avatarzRzRzRzRzRzRzR <2448370773@qq.com>
Signed-off-by: default avatarliuye.hj <liuye.hj@alibaba-inc.com>
Signed-off-by: default avatarJuechen Liu <jueliu@meta.com>
Signed-off-by: default avatarsimon-mo <simon.mo@hey.com>
Signed-off-by: default avatarRobert Shaw <robshaw@redhat.com>
Signed-off-by: default avatarThomas Parnell <tpa@zurich.ibm.com>
Signed-off-by: default avatarisotr0py <2037008807@qq.com>
Signed-off-by: default avataryingjun-mou <renzomou@gmail.com>
Signed-off-by: zhoukz's avatarzhoukz <me@zhoukz.com>
Signed-off-by: default avatarChenxi Yang <cxyang@fb.com>
Signed-off-by: default avatarRahul Tuli <rtuli@redhat.com>
Signed-off-by: default avatarLee Nau <lnau@nvidia.com>
Signed-off-by: default avataradabeyta <aabeyta@redhat.com>
Signed-off-by: default avatarGregory Shtrasberg <Gregory.Shtrasberg@amd.com>
Signed-off-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
Signed-off-by: default avatarsimondanielsson <simon.danielsson99@hotmail.com>
Signed-off-by: default avatarChen Zhang <zhangch99@outlook.com>
Signed-off-by: default avatarYongye Zhu <zyy1102000@gmail.com>
Signed-off-by: default avatarBarry Kang <43644113+Barry-Delaney@users.noreply.github.com>
Signed-off-by: default avatarLucia Fang <fanglu@meta.com>
Signed-off-by: default avatara120092009 <zhaoty0121@gmail.com>
Signed-off-by: default avatarsergiopaniego <sergiopaniegoblanco@gmail.com>
Signed-off-by: default avatarSergio Paniego Blanco <sergiopaniegoblanco@gmail.com>
Signed-off-by: default avatarwangyafeng <wangyafeng@baidu.com>
Signed-off-by: default avatarLehua Ding <lehuading@tencent.com>
Signed-off-by: default avatarlyd1992 <liuyudong@iscas.ac.cn>
Signed-off-by: default avatarihb2032 <1355790728@qq.com>
Signed-off-by: default avatarasafg <39553475+Josephasafg@users.noreply.github.com>
Signed-off-by: default avataranion <1005128408@qq.com>
Signed-off-by: default avatarAnion <123177548+Anionex@users.noreply.github.com>
Signed-off-by: default avatarPavani Majety <pmajety@nvidia.com>
Signed-off-by: default avatarBill Nell <bnell@redhat.com>
Signed-off-by: default avatarbnellnm <49004751+bnellnm@users.noreply.github.com>
Signed-off-by: default avatarOr Ozeri <oro@il.ibm.com>
Signed-off-by: default avatarcjackal <44624812+cjackal@users.noreply.github.com>
Signed-off-by: default avatarDavid Ben-David <davidb@pliops.com>
Signed-off-by: default avatarAndrew Xia <axia@meta.com>
Signed-off-by: default avatarAndrew Xia <axia@fb.com>
Signed-off-by: default avatarLu Fang <fanglu@fb.com>
Signed-off-by: default avatarSalvatore Cena <cena@cenas.it>
Signed-off-by: default avatarpadg9912 <phone.and.desktop@gmail.com>
Signed-off-by: default avatarnadathurv <work.vnadathur@gmail.com>
Signed-off-by: default avatarWorldExplored <srreyansh.sethi@gmail.com>
Signed-off-by: default avatarwwl2755 <wangwenlong2755@gmail.com>
Signed-off-by: default avatarbillishyahao <bill.he@amd.com>
Signed-off-by: default avatarNathan Scott <nathans@redhat.com>
Signed-off-by: default avatarKenichi Maehashi <maehashi@preferred.jp>
Signed-off-by: default avatarJohnny <johnnynuca14@gmail.com>
Signed-off-by: default avatarjohnnynunez <johnnynuca14@gmail.com>
Signed-off-by: default avatarJohnny <johnnync13@gmail.com>
Signed-off-by: default avatarHuamin Li <3ericli@gmail.com>
Signed-off-by: default avatarHosang Yoon <hosang.yoon@amd.com>
Signed-off-by: default avatarJerry Zhang <jerryzh168@gmail.com>
Signed-off-by: default avatarPeter Schuurman <psch@google.com>
Signed-off-by: default avatarHuy Do <huydhn@gmail.com>
Signed-off-by: default avatarleo-pony <nengjunma@outlook.com>
Signed-off-by: default avatarvllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: default avatarLucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Signed-off-by: default avatarElizaWszola <ewszola@redhat.com>
Signed-off-by: default avatarElizaWszola <elizaw.9289@gmail.com>
Signed-off-by: default avatarLuka Govedič <lgovedic@redhat.com>
Signed-off-by: default avatarLuka Govedič <ProExpertProg@users.noreply.github.com>
Signed-off-by: default avatarMichael Goin <mgoin64@gmail.com>
Signed-off-by: default avatarBenjamin Chislett <bchislett@nvidia.com>
Signed-off-by: default avatartjtanaa <tunjian.tan@embeddedllm.com>
Signed-off-by: default avatarzhewenli <zhewenli@meta.com>
Signed-off-by: default avatarahao-anyscale <ahao@anyscale.com>
Signed-off-by: default avatarVarun Sundar Rabindranath <vsundarr@redhat.com>
Signed-off-by: default avatarhuijjj <huijong.jeong@squeezebits.com>
Signed-off-by: default avatarYannick Schnider <yannick.schnider1@ibm.com>
Signed-off-by: default avatarkyt <eluban4532@gmail.com>
Signed-off-by: default avatarEgor <e.a.krivov@gmail.com>
Signed-off-by: default avatarYang <lymailforjob@gmail.com>
Signed-off-by: default avatarPaul Pak <paulpak58@gmail.com>
Signed-off-by: default avatarwhx-sjtu <2952154980@qq.com>
Signed-off-by: default avatarXiang Si <sixiang@google.com>
Signed-off-by: default avatarAleksandr Samarin <astrlrd@nebius.com>
Signed-off-by: default avatarJun Jiang <jasl9187@hotmail.com>
Signed-off-by: default avatarChendi Xue <Chendi.Xue@intel.com>
Signed-off-by: default avatarChendi.Xue <chendi.xue@intel.com>
Signed-off-by: default avatarNikhil Ghosh <nikhil@anyscale.com>
Co-authored-by: Nicole LiHui 🥜

 <nicolelihui@outlook.com>
Co-authored-by: default avatarcourage17340 <courage17340@users.noreply.github.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
Co-authored-by: default avatarJacob Kahn <jacobkahn1@gmail.com>
Co-authored-by: default avatarRoger Wang <hey@rogerw.io>
Co-authored-by: Nicole LiHui 🥜

 <nicole.li@daocloud.io>
Co-authored-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: default avatarFadi Arafeh <115173828+fadara01@users.noreply.github.com>
Co-authored-by: default avatarAgata Dobrzyniewicz <160237065+adobrzyn@users.noreply.github.com>
Co-authored-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: default avataryyzxw <34639446+yyzxw@users.noreply.github.com>
Co-authored-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: default avatarwang.yuqi <noooop@126.com>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: default avatarKunshang Ji <kunshang.ji@intel.com>
Co-authored-by: default avatarchenlang <chen.lang5@zte.com.cn>
Co-authored-by: default avatarchenlang <10346245@zte.com.cn>
Co-authored-by: default avataryoukaichao <youkaichao@gmail.com>
Co-authored-by: default avatarJonas M. Kübler <44084297+jmkuebler@users.noreply.github.com>
Co-authored-by: default avatarLi, Jiang <jiang1.li@intel.com>
Co-authored-by: default avatarRussell Bryant <rbryant@redhat.com>
Co-authored-by: default avatarNicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: default avatarAlonKejzman <alonkeizman@gmail.com>
Co-authored-by: default avatarMichael Goin <mgoin64@gmail.com>
Co-authored-by: default avatarLucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Co-authored-by: default avatarTao Hui <taohui3@gmail.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
Co-authored-by: default avatarJee Jee Li <pandaleefree@gmail.com>
Co-authored-by: default avatarEkagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com>
Co-authored-by: default avatarNick Hill <nhill@redhat.com>
Co-authored-by: default avatarZhuohan Li <zhuohan123@gmail.com>
Co-authored-by: default avatarYe (Charlotte) Qi <yeq@meta.com>
Co-authored-by: default avatartomeras91 <57313761+tomeras91@users.noreply.github.com>
Co-authored-by: default avatarShu Wang <shuw@nvidia.com>
Co-authored-by: default avatarAleksandr Malyshev <164964928+maleksan85@users.noreply.github.com>
Co-authored-by: default avatarAleksandr Malyshev <maleksan@amd.com>
Co-authored-by: default avatarDoug Lehr <douglehr@amd.com>
Co-authored-by: default avatarEugene Khvedchenya <ekhvedchenya@gmail.com>
Co-authored-by: default avataryitingdc <59356937+yitingdc@users.noreply.github.com>
Co-authored-by: default avatarAndrew Sansom <andrew@protopia.ai>
Co-authored-by: default avatarxaguilar-amd <xavier.aguilarfruto@amd.com>
Co-authored-by: default avatarIceber Gu <caiwei95@hotmail.com>
Co-authored-by: default avatarTao He <linzhu.ht@alibaba-inc.com>
Co-authored-by: default avatarIcey <1790571317@qq.com>
Co-authored-by: default avatarSage Moore <sage@neuralmagic.com>
Co-authored-by: default avatarRobert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
Co-authored-by: default avatarXu Wenqing <121550081+Xu-Wenqing@users.noreply.github.com>
Co-authored-by: default avatarChih-Chieh Yang <7364402+cyang49@users.noreply.github.com>
Co-authored-by: default avatarRishiAstra <40644327+RishiAstra@users.noreply.github.com>
Co-authored-by: default avatarChauncey <chaunceyjiang@gmail.com>
Co-authored-by: default avatarSeiji Eicher <58963096+eicherseiji@users.noreply.github.com>
Co-authored-by: default avatarRui Qiao <161574667+ruisearch42@users.noreply.github.com>
Co-authored-by: default avatarJiangyun Zhu <riverclouds.zhu@qq.com>
Co-authored-by: default avatarLuka Govedič <ProExpertProg@users.noreply.github.com>
Co-authored-by: default avatar阿丹(adan) <47373076+LDLINGLINGLING@users.noreply.github.com>
Co-authored-by: default avatarliudan <adan@minicpm.com>
Co-authored-by: default avatarliudan <liudan@qq.com>
Co-authored-by: default avatarLucia Fang <116399278+luccafong@users.noreply.github.com>
Co-authored-by: default avatarClouddude <kouss.hd@gmail.com>
Co-authored-by: default avatarFrank Wang <41319051+frankwang28@users.noreply.github.com>
Co-authored-by: default avatarfhl2000 <63384265+fhl2000@users.noreply.github.com>
Co-authored-by: default avatarqizixi <22851944+zixi-qi@users.noreply.github.com>
Co-authored-by: default avatarBram Wasti <bwasti@fb.com>
Co-authored-by: default avatarNaman Lalit <nl2688@nyu.edu>
Co-authored-by: default avatarChenheli Hua <huachenheli@outlook.com>
Co-authored-by: default avatarWeiQing Chen <40507679+david6666666@users.noreply.github.com>
Co-authored-by: default avatarJunhong <liujunhong11@huawei.com>
Co-authored-by: default avatarLJH-LBJ <98734602+LJH-LBJ@users.noreply.github.com>
Co-authored-by: default avatar22quinn <33176974+22quinn@users.noreply.github.com>
Co-authored-by: default avatarXiaohan Zou <renovamenzxh@gmail.com>
Co-authored-by: default avatarrentianyue-jk <rentianyue-jk@360shuke.com>
Co-authored-by: default avatarTyler Michael Smith <tlrmchlsmth@gmail.com>
Co-authored-by: default avatarPeter Pan <peter.pan@daocloud.io>
Co-authored-by: default avatarPatrick C. Toulme <135739773+patrick-toulme@users.noreply.github.com>
Co-authored-by: default avatarClayton Coleman <smarterclayton@gmail.com>
Co-authored-by: default avatarJialin Ouyang <Jialin.Ouyang@gmail.com>
Co-authored-by: default avatarJialin Ouyang <jialino@meta.com>
Co-authored-by: default avatarweiliang <weiliangl@nvidia.com>
Co-authored-by: default avatarYuxuan Zhang <2448370773@qq.com>
Co-authored-by: default avatarJJJYmmm <92386084+JJJYmmm@users.noreply.github.com>
Co-authored-by: default avatarliuye.hj <liuye.hj@alibaba-inc.com>
Co-authored-by: default avatarJuechen Liu <grinchcoder@gmail.com>
Co-authored-by: default avatarRobert Shaw <robshaw@redhat.com>
Co-authored-by: default avatarThomas Parnell <tpa@zurich.ibm.com>
Co-authored-by: default avatarYingjun Mou <renzomou@gmail.com>
Co-authored-by: zhoukz's avatarZhou Jiahao <me@zhoukz.com>
Co-authored-by: default avatarChenxi Yang <cxyang@cs.utexas.edu>
Co-authored-by: default avatarChenxi Yang <cxyang@fb.com>
Co-authored-by: default avatarRahul Tuli <rtuli@redhat.com>
Co-authored-by: default avatarLee Nau <lee.nau@gmail.com>
Co-authored-by: default avatarAdrian Abeyta <aabeyta@redhat.com>
Co-authored-by: default avatarGregory Shtrasberg <156009573+gshtras@users.noreply.github.com>
Co-authored-by: default avatarAaron Pham <contact@aarnphm.xyz>
Co-authored-by: default avataracisseJZhong <40467976+acisseJZhong@users.noreply.github.com>
Co-authored-by: default avatarSimon Danielsson <70206058+simondanielsson@users.noreply.github.com>
Co-authored-by: default avatarYongye Zhu <zyy1102000@gmail.com>
Co-authored-by: default avatarChen Zhang <zhangch99@outlook.com>
Co-authored-by: default avatarLucas Wilkinson <lwilkins@redhat.com>
Co-authored-by: default avatarLucia Fang <fanglu@meta.com>
Co-authored-by: default avatarSiyuan Fu <siyuanf@nvidia.com>
Co-authored-by: default avatarXiaozhu Meng <mxz297@gmail.com>
Co-authored-by: default avatarBarry Kang <43644113+Barry-Delaney@users.noreply.github.com>
Co-authored-by: default avatara120092009 <33205509+a120092009@users.noreply.github.com>
Co-authored-by: default avatarSergio Paniego Blanco <sergiopaniegoblanco@gmail.com>
Co-authored-by: default avatarCSWYF3634076 <wangyafeng@baidu.com>
Co-authored-by: default avatarLehua Ding <lehuading@tencent.com>
Co-authored-by: default avatarReza Barazesh <3146276+rzabarazesh@users.noreply.github.com>
Co-authored-by: default avatarihb2032 <40718643+ihb2032@users.noreply.github.com>
Co-authored-by: default avatarAsaf Joseph Gardin <39553475+Josephasafg@users.noreply.github.com>
Co-authored-by: default avatarAnion <123177548+Anionex@users.noreply.github.com>
Co-authored-by: default avatarPavani Majety <pmajety@nvidia.com>
Co-authored-by: default avatarbnellnm <49004751+bnellnm@users.noreply.github.com>
Co-authored-by: default avatarOr Ozeri <oro@il.ibm.com>
Co-authored-by: default avatarcjackal <44624812+cjackal@users.noreply.github.com>
Co-authored-by: default avatarDavid Ben-David <sdavidbd@gmail.com>
Co-authored-by: default avatarDavid Ben-David <davidb@pliops.com>
Co-authored-by: default avatarAndrew Xia <axia@mit.edu>
Co-authored-by: default avatarAndrew Xia <axia@fb.com>
Co-authored-by: default avatarSalvatore Cena <cena@cenas.it>
Co-authored-by: default avatarParam <psch@cs.unc.edu>
Co-authored-by: default avatarZhewen Li <zhewenli@meta.com>
Co-authored-by: default avatarnadathurv <work.vnadathur@gmail.com>
Co-authored-by: default avatarSrreyansh Sethi <107075589+WorldExplored@users.noreply.github.com>
Co-authored-by: default avatarWenlong Wang <wangwenlong2755@gmail.com>
Co-authored-by: default avatarbillishyahao <bill.he@amd.com>
Co-authored-by: default avatarNathan Scott <natoscott@users.noreply.github.com>
Co-authored-by: default avatarKenichi Maehashi <939877+kmaehashi@users.noreply.github.com>
Co-authored-by: default avatarJohnny <johnnync13@gmail.com>
Co-authored-by: default avatarAidyn-A <31858918+Aidyn-A@users.noreply.github.com>
Co-authored-by: default avatarHuamin Li <3ericli@gmail.com>
Co-authored-by: default avatarrshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: default avatarHosang <156028780+hyoon1@users.noreply.github.com>
Co-authored-by: default avatarJerry Zhang <jerryzh168@gmail.com>
Co-authored-by: default avatarpwschuurman <psch@google.com>
Co-authored-by: default avatarHuy Do <huydhn@gmail.com>
Co-authored-by: default avatarleo-pony <nengjunma@outlook.com>
Co-authored-by: default avatarvllmellm <vllm.ellm@embeddedllm.com>
Co-authored-by: default avatarElizaWszola <ewszola@redhat.com>
Co-authored-by: default avatarLuka Govedič <lgovedic@redhat.com>
Co-authored-by: default avatarBenjamin Chislett <bchislett@nvidia.com>
Co-authored-by: default avatarAndrew Xia <axia@meta.com>
Co-authored-by: default avatarSimon Mo <simon.mo@hey.com>
Co-authored-by: default avatarTJian <tunjian.tan@embeddedllm.com>
Co-authored-by: default avatarahao-anyscale <ahao@anyscale.com>
Co-authored-by: default avatarVarun Sundar Rabindranath <varunsundar08@gmail.com>
Co-authored-by: default avatarVarun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: default avatarLiu-congo <1502632128@qq.com>
Co-authored-by: default avatarHUIJONG JEONG <64083281+huijjj@users.noreply.github.com>
Co-authored-by: default avatarYannick Schnider <Yannick.Schnider1@ibm.com>
Co-authored-by: default avatarkyt <eluban4532@gmail.com>
Co-authored-by: default avatarEgor <e.a.krivov@gmail.com>
Co-authored-by: default avatarYang Liu <127183760+KKSK-DON@users.noreply.github.com>
Co-authored-by: default avatarPaul Pak <52512091+paulpak58@users.noreply.github.com>
Co-authored-by: default avatarwhx <56632993+whx-sjtu@users.noreply.github.com>
Co-authored-by: default avatarXiang Si <sixiang@google.com>
Co-authored-by: default avatarAleksandr Samarin <samarin_ad@mail.ru>
Co-authored-by: default avatarJun Jiang <jasl9187@hotmail.com>
Co-authored-by: default avatarChendi.Xue <chendi.xue@intel.com>
Co-authored-by: default avatarNikhil G <nrghosh@users.noreply.github.com>
parent 9fc983c7
......@@ -269,8 +269,8 @@ set(VLLM_EXT_SRC
"csrc/sampler.cu"
"csrc/cuda_view.cu"
"csrc/quantization/gptq/q_gemm.cu"
"csrc/quantization/compressed_tensors/int8_quant_kernels.cu"
"csrc/quantization/fp8/common.cu"
"csrc/quantization/w8a8/int8/scaled_quant.cu"
"csrc/quantization/w8a8/fp8/common.cu"
"csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu"
"csrc/quantization/gguf/gguf_kernel.cu"
"csrc/quantization/activation_kernels.cu"
......@@ -314,12 +314,13 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
list(APPEND VLLM_EXT_SRC
"csrc/quantization/awq/gemm_kernels.cu"
"csrc/permute_cols.cu"
"csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu"
"csrc/quantization/w8a8/cutlass/scaled_mm_entry.cu"
"csrc/quantization/fp4/nvfp4_quant_entry.cu"
"csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu"
"csrc/sparse/cutlass/sparse_scaled_mm_entry.cu"
"csrc/cutlass_extensions/common.cpp"
"csrc/quantization/fp8/per_token_group_quant.cu")
"csrc/quantization/w8a8/fp8/per_token_group_quant.cu"
"csrc/quantization/w8a8/int8/per_token_group_quant.cu")
set_gencode_flags_for_srcs(
SRCS "${VLLM_EXT_SRC}"
......@@ -423,11 +424,11 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
cuda_archs_loose_intersection(SCALED_MM_ARCHS "9.0a;" "${CUDA_ARCHS}")
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0 AND SCALED_MM_ARCHS)
set(SRCS
"csrc/quantization/cutlass_w8a8/scaled_mm_c3x_sm90.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_fp8.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_int8.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_azp_sm90_int8.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm90_fp8.cu")
"csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm90.cu"
"csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_fp8.cu"
"csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm90_int8.cu"
"csrc/quantization/w8a8/cutlass/c3x/scaled_mm_azp_sm90_int8.cu"
"csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm90_fp8.cu")
set_gencode_flags_for_srcs(
SRCS "${SRCS}"
CUDA_ARCHS "${SCALED_MM_ARCHS}")
......@@ -458,9 +459,9 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
endif()
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND SCALED_MM_ARCHS)
set(SRCS
"csrc/quantization/cutlass_w8a8/scaled_mm_c3x_sm120.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm120_fp8.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm120_fp8.cu"
"csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm120.cu"
"csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm120_fp8.cu"
"csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm120_fp8.cu"
)
set_gencode_flags_for_srcs(
SRCS "${SRCS}"
......@@ -492,9 +493,9 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
endif()
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND SCALED_MM_ARCHS)
set(SRCS
"csrc/quantization/cutlass_w8a8/scaled_mm_c3x_sm100.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm100_fp8.cu"
"csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm100_fp8.cu"
"csrc/quantization/w8a8/cutlass/scaled_mm_c3x_sm100.cu"
"csrc/quantization/w8a8/cutlass/c3x/scaled_mm_sm100_fp8.cu"
"csrc/quantization/w8a8/cutlass/c3x/scaled_mm_blockwise_sm100_fp8.cu"
)
set_gencode_flags_for_srcs(
SRCS "${SRCS}"
......@@ -525,7 +526,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
# subtract out the archs that are already built for 3x
list(REMOVE_ITEM SCALED_MM_2X_ARCHS ${SCALED_MM_3X_ARCHS})
if (SCALED_MM_2X_ARCHS)
set(SRCS "csrc/quantization/cutlass_w8a8/scaled_mm_c2x.cu")
set(SRCS "csrc/quantization/w8a8/cutlass/scaled_mm_c2x.cu")
set_gencode_flags_for_srcs(
SRCS "${SRCS}"
CUDA_ARCHS "${SCALED_MM_2X_ARCHS}")
......@@ -648,7 +649,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
# if it's possible to compile MoE kernels that use its output.
cuda_archs_loose_intersection(SCALED_MM_ARCHS "9.0a" "${CUDA_ARCHS}")
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3 AND SCALED_MM_ARCHS)
set(SRCS "csrc/quantization/cutlass_w8a8/moe/grouped_mm_c3x_sm90.cu")
set(SRCS "csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm90.cu")
set_gencode_flags_for_srcs(
SRCS "${SRCS}"
CUDA_ARCHS "${SCALED_MM_ARCHS}")
......@@ -672,7 +673,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a" "${CUDA_ARCHS}")
endif()
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND SCALED_MM_ARCHS)
set(SRCS "csrc/quantization/cutlass_w8a8/moe/grouped_mm_c3x_sm100.cu")
set(SRCS "csrc/quantization/w8a8/cutlass/moe/grouped_mm_c3x_sm100.cu")
set_gencode_flags_for_srcs(
SRCS "${SRCS}"
CUDA_ARCHS "${SCALED_MM_ARCHS}")
......@@ -697,7 +698,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
cuda_archs_loose_intersection(CUTLASS_MOE_DATA_ARCHS "9.0a;10.0a;10.1a;10.3a;12.0a;12.1a" "${CUDA_ARCHS}")
endif()
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.3 AND CUTLASS_MOE_DATA_ARCHS)
set(SRCS "csrc/quantization/cutlass_w8a8/moe/moe_data.cu")
set(SRCS "csrc/quantization/w8a8/cutlass/moe/moe_data.cu")
set_gencode_flags_for_srcs(
SRCS "${SRCS}"
CUDA_ARCHS "${CUTLASS_MOE_DATA_ARCHS}")
......@@ -720,7 +721,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
cuda_archs_loose_intersection(SCALED_MM_ARCHS "10.0a;10.1a;10.3a;12.0a;12.1a" "${CUDA_ARCHS}")
endif()
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND SCALED_MM_ARCHS)
set(SRCS "csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu")
set(SRCS "csrc/quantization/w8a8/cutlass/moe/blockwise_scaled_group_mm_sm100.cu")
set_gencode_flags_for_srcs(
SRCS "${SRCS}"
CUDA_ARCHS "${SCALED_MM_ARCHS}")
......
......@@ -28,10 +28,10 @@
#ifdef USE_ROCM
#include <hip/hip_bf16.h>
#include "../quantization/fp8/amd/quant_utils.cuh"
#include "../quantization/w8a8/fp8/amd/quant_utils.cuh"
typedef __hip_bfloat16 __nv_bfloat16;
#else
#include "../quantization/fp8/nvidia/quant_utils.cuh"
#include "../quantization/w8a8/fp8/nvidia/quant_utils.cuh"
#endif
#define MAX(a, b) ((a) > (b) ? (a) : (b))
......
......@@ -9,9 +9,9 @@
#include "quantization/vectorization_utils.cuh"
#ifdef USE_ROCM
#include "quantization/fp8/amd/quant_utils.cuh"
#include "quantization/w8a8/fp8/amd/quant_utils.cuh"
#else
#include "quantization/fp8/nvidia/quant_utils.cuh"
#include "quantization/w8a8/fp8/nvidia/quant_utils.cuh"
#endif
#include <algorithm>
......
......@@ -12,6 +12,7 @@ using CubMaxOp = cub::Max;
#endif // CUB_VERSION
#else
#include <hipcub/hipcub.hpp>
using CubAddOp = cub::Sum;
using CubMaxOp = cub::Max;
namespace cub = hipcub;
using CubAddOp = hipcub::Sum;
using CubMaxOp = hipcub::Max;
#endif // USE_ROCM
......@@ -6,7 +6,7 @@
*/
#include "type_convert.cuh"
#include "quantization/fp8/common.cuh"
#include "quantization/w8a8/fp8/common.cuh"
#include "dispatch_utils.h"
#include "cub_helpers.h"
#include "core/batch_invariant.hpp"
......
......@@ -7,7 +7,7 @@
#include "../cuda_compat.h"
#include "dispatch_utils.h"
#include "quantization/fp8/common.cuh"
#include "quantization/w8a8/fp8/common.cuh"
#include <c10/util/Float8_e4m3fn.h>
......
......@@ -6,7 +6,7 @@
#include "quantization/vectorization.cuh"
// TODO(luka/varun):refactor common.cuh to use this file instead
#include "quantization/fp8/common.cuh"
#include "quantization/w8a8/fp8/common.cuh"
namespace vllm {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment