Unverified Commit 241b4cfe authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Refactor] Refactor FP8 & INT8 Quant Folder inside `w8a8` (#25293)


Signed-off-by: default avatarnicole-lihui <nicole.li@daocloud.io>
Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
Signed-off-by: default avatarcourage17340 <courage17340@163.com>
Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: default avatarJacob Kahn <jacobkahn1@gmail.com>
Signed-off-by: default avatarTyler Michael Smith <tlrmchlsmth@gmail.com>
Signed-off-by: default avatarFadi Arafeh <fadi.arafeh@arm.com>
Signed-off-by: default avatarRoger Wang <hey@rogerw.io>
Signed-off-by: default avatarAgata Dobrzyniewicz <adobrzyniewicz@habana.ai>
Signed-off-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
Signed-off-by: default avatarzxw <1020938856@qq.com>
Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
Signed-off-by: default avatarwang.yuqi <noooop@126.com>
Signed-off-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
Signed-off-by: default avatarKunshang Ji <kunshang.ji@intel.com>
Signed-off-by: default avatarchenlang <chen.lang5@zte.com.cn>
Signed-off-by: default avataryoukaichao <youkaichao@gmail.com>
Signed-off-by: default avatarJonas Kuebler <kuebj@amazon.com>
Signed-off-by: default avatarjiang1.li <jiang1.li@intel.com>
Signed-off-by: default avatarRussell Bryant <rbryant@redhat.com>
Signed-off-by: default avatarNickLucche <nlucches@redhat.com>
Signed-off-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
Signed-off-by: default avatarAlonKejzman <alonkeizman@gmail.com>
Signed-off-by: default avatarLucas Wilkinson <lwilkins@redhat.com>
Signed-off-by: default avatartaohui <taohui3@gmail.com>
Signed-off-by: default avatarTao Hui <taohui3@gmail.com>
Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
Signed-off-by: default avatarMatthew Bonanni <mbonanni001@gmail.com>
Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
Signed-off-by: default avatarEkagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com>
Signed-off-by: default avatarZhuohan Li <zhuohan123@gmail.com>
Signed-off-by: default avatarTomer Asida <57313761+tomeras91@users.noreply.github.com>
Signed-off-by: default avatarShu Wang. <shuw@nvidia.com>
Signed-off-by: default avatarNick Hill <nhill@redhat.com>
Signed-off-by: default avatarAleksandr Malyshev <maleksan@amd.com>
Signed-off-by: default avatarEugene Khvedchenia <ekhvedchenia@nvidia.com>
Signed-off-by: default avatarEugene Khvedchenya <ekhvedchenya@gmail.com>
Signed-off-by: default avataryiting.jiang <yiting.jiang@daocloud.io>
Signed-off-by: default avatarAndrew Sansom <andrew@protopia.ai>
Signed-off-by: default avatarxaguilar <Xavier.AguilarFruto@amd.com>
Signed-off-by: default avatarIceber Gu <caiwei95@hotmail.com>
Signed-off-by: default avatarTao He <linzhu.ht@alibaba-inc.com>
Signed-off-by: default avatarIcey <1790571317@qq.com>
Signed-off-by: default avatarSage Moore <sage@neuralmagic.com>
Signed-off-by: default avatar许文卿 <xwq391974@alibaba-inc.com>
Signed-off-by: default avatarChih-Chieh-Yang <7364402+cyang49@users.noreply.github.com>
Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
Signed-off-by: default avatarSeiji Eicher <seiji@anyscale.com>
Signed-off-by: default avatarSeiji Eicher <58963096+eicherseiji@users.noreply.github.com>
Signed-off-by: default avatarzjy0516 <riverclouds.zhu@qq.com>
Signed-off-by: default avatarKosseila (CloudThrill) <klouddude@gmail.com>
Signed-off-by: default avatarfrankwang28 <frank.wbb@hotmail.com>
Signed-off-by: default avatarFrank Wang <41319051+frankwang28@users.noreply.github.com>
Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
Signed-off-by: default avatarfhl2000 <63384265+fhl2000@users.noreply.github.com>
Signed-off-by: default avatarzixi-qi <qizixi@meta.com>
Signed-off-by: default avatarBram Wasti <bwasti@meta.com>
Signed-off-by: default avatarNaman Lalit <nl2688@nyu.edu>
Signed-off-by: default avatarChenheli Hua <huachenheli@outlook.com>
Signed-off-by: default avatarJunhong <liujunhong11@huawei.com>
Signed-off-by: default avatarJunhong Liu <98734602+LJH-LBJ@users.noreply.github.com>
Signed-off-by: default avatar22quinn <33176974+22quinn@users.noreply.github.com>
Signed-off-by: default avatarrentianyue-jk <rentianyue-jk@360shuke.com>
Signed-off-by: default avatarPeter Pan <Peter.Pan@daocloud.io>
Signed-off-by: default avatarPatrick Toulme <ptoulme@meta.com>
Signed-off-by: default avatarPatrick Toulme <pctoulme+1@gmail.com>
Signed-off-by: default avatarJiangyun Zhu <riverclouds.zhu@qq.com>
Signed-off-by: default avatarClayton Coleman <smarterclayton@gmail.com>
Signed-off-by: default avatarJialin Ouyang <jialino@meta.com>
Signed-off-by: default avatarJialin Ouyang <Jialin.Ouyang@gmail.com>
Signed-off-by: default avatarWeiliang Liu <weiliangl@nvidia.com>
Signed-off-by: default avatarzRzRzRzRzRzRzR <2448370773@qq.com>
Signed-off-by: default avatarliuye.hj <liuye.hj@alibaba-inc.com>
Signed-off-by: default avatarJuechen Liu <jueliu@meta.com>
Signed-off-by: default avatarsimon-mo <simon.mo@hey.com>
Signed-off-by: default avatarRobert Shaw <robshaw@redhat.com>
Signed-off-by: default avatarThomas Parnell <tpa@zurich.ibm.com>
Signed-off-by: default avatarisotr0py <2037008807@qq.com>
Signed-off-by: default avataryingjun-mou <renzomou@gmail.com>
Signed-off-by: zhoukz's avatarzhoukz <me@zhoukz.com>
Signed-off-by: default avatarChenxi Yang <cxyang@fb.com>
Signed-off-by: default avatarRahul Tuli <rtuli@redhat.com>
Signed-off-by: default avatarLee Nau <lnau@nvidia.com>
Signed-off-by: default avataradabeyta <aabeyta@redhat.com>
Signed-off-by: default avatarGregory Shtrasberg <Gregory.Shtrasberg@amd.com>
Signed-off-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
Signed-off-by: default avatarsimondanielsson <simon.danielsson99@hotmail.com>
Signed-off-by: default avatarChen Zhang <zhangch99@outlook.com>
Signed-off-by: default avatarYongye Zhu <zyy1102000@gmail.com>
Signed-off-by: default avatarBarry Kang <43644113+Barry-Delaney@users.noreply.github.com>
Signed-off-by: default avatarLucia Fang <fanglu@meta.com>
Signed-off-by: default avatara120092009 <zhaoty0121@gmail.com>
Signed-off-by: default avatarsergiopaniego <sergiopaniegoblanco@gmail.com>
Signed-off-by: default avatarSergio Paniego Blanco <sergiopaniegoblanco@gmail.com>
Signed-off-by: default avatarwangyafeng <wangyafeng@baidu.com>
Signed-off-by: default avatarLehua Ding <lehuading@tencent.com>
Signed-off-by: default avatarlyd1992 <liuyudong@iscas.ac.cn>
Signed-off-by: default avatarihb2032 <1355790728@qq.com>
Signed-off-by: default avatarasafg <39553475+Josephasafg@users.noreply.github.com>
Signed-off-by: default avataranion <1005128408@qq.com>
Signed-off-by: default avatarAnion <123177548+Anionex@users.noreply.github.com>
Signed-off-by: default avatarPavani Majety <pmajety@nvidia.com>
Signed-off-by: default avatarBill Nell <bnell@redhat.com>
Signed-off-by: default avatarbnellnm <49004751+bnellnm@users.noreply.github.com>
Signed-off-by: default avatarOr Ozeri <oro@il.ibm.com>
Signed-off-by: default avatarcjackal <44624812+cjackal@users.noreply.github.com>
Signed-off-by: default avatarDavid Ben-David <davidb@pliops.com>
Signed-off-by: default avatarAndrew Xia <axia@meta.com>
Signed-off-by: default avatarAndrew Xia <axia@fb.com>
Signed-off-by: default avatarLu Fang <fanglu@fb.com>
Signed-off-by: default avatarSalvatore Cena <cena@cenas.it>
Signed-off-by: default avatarpadg9912 <phone.and.desktop@gmail.com>
Signed-off-by: default avatarnadathurv <work.vnadathur@gmail.com>
Signed-off-by: default avatarWorldExplored <srreyansh.sethi@gmail.com>
Signed-off-by: default avatarwwl2755 <wangwenlong2755@gmail.com>
Signed-off-by: default avatarbillishyahao <bill.he@amd.com>
Signed-off-by: default avatarNathan Scott <nathans@redhat.com>
Signed-off-by: default avatarKenichi Maehashi <maehashi@preferred.jp>
Signed-off-by: default avatarJohnny <johnnynuca14@gmail.com>
Signed-off-by: default avatarjohnnynunez <johnnynuca14@gmail.com>
Signed-off-by: default avatarJohnny <johnnync13@gmail.com>
Signed-off-by: default avatarHuamin Li <3ericli@gmail.com>
Signed-off-by: default avatarHosang Yoon <hosang.yoon@amd.com>
Signed-off-by: default avatarJerry Zhang <jerryzh168@gmail.com>
Signed-off-by: default avatarPeter Schuurman <psch@google.com>
Signed-off-by: default avatarHuy Do <huydhn@gmail.com>
Signed-off-by: default avatarleo-pony <nengjunma@outlook.com>
Signed-off-by: default avatarvllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: default avatarLucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Signed-off-by: default avatarElizaWszola <ewszola@redhat.com>
Signed-off-by: default avatarElizaWszola <elizaw.9289@gmail.com>
Signed-off-by: default avatarLuka Govedič <lgovedic@redhat.com>
Signed-off-by: default avatarLuka Govedič <ProExpertProg@users.noreply.github.com>
Signed-off-by: default avatarMichael Goin <mgoin64@gmail.com>
Signed-off-by: default avatarBenjamin Chislett <bchislett@nvidia.com>
Signed-off-by: default avatartjtanaa <tunjian.tan@embeddedllm.com>
Signed-off-by: default avatarzhewenli <zhewenli@meta.com>
Signed-off-by: default avatarahao-anyscale <ahao@anyscale.com>
Signed-off-by: default avatarVarun Sundar Rabindranath <vsundarr@redhat.com>
Signed-off-by: default avatarhuijjj <huijong.jeong@squeezebits.com>
Signed-off-by: default avatarYannick Schnider <yannick.schnider1@ibm.com>
Signed-off-by: default avatarkyt <eluban4532@gmail.com>
Signed-off-by: default avatarEgor <e.a.krivov@gmail.com>
Signed-off-by: default avatarYang <lymailforjob@gmail.com>
Signed-off-by: default avatarPaul Pak <paulpak58@gmail.com>
Signed-off-by: default avatarwhx-sjtu <2952154980@qq.com>
Signed-off-by: default avatarXiang Si <sixiang@google.com>
Signed-off-by: default avatarAleksandr Samarin <astrlrd@nebius.com>
Signed-off-by: default avatarJun Jiang <jasl9187@hotmail.com>
Signed-off-by: default avatarChendi Xue <Chendi.Xue@intel.com>
Signed-off-by: default avatarChendi.Xue <chendi.xue@intel.com>
Signed-off-by: default avatarNikhil Ghosh <nikhil@anyscale.com>
Co-authored-by: Nicole LiHui 🥜

 <nicolelihui@outlook.com>
Co-authored-by: default avatarcourage17340 <courage17340@users.noreply.github.com>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
Co-authored-by: default avatarJacob Kahn <jacobkahn1@gmail.com>
Co-authored-by: default avatarRoger Wang <hey@rogerw.io>
Co-authored-by: Nicole LiHui 🥜

 <nicole.li@daocloud.io>
Co-authored-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: default avatarFadi Arafeh <115173828+fadara01@users.noreply.github.com>
Co-authored-by: default avatarAgata Dobrzyniewicz <160237065+adobrzyn@users.noreply.github.com>
Co-authored-by: default avatarIsotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: default avataryyzxw <34639446+yyzxw@users.noreply.github.com>
Co-authored-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: default avatarwang.yuqi <noooop@126.com>
Co-authored-by: default avatarCyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: default avatarKunshang Ji <kunshang.ji@intel.com>
Co-authored-by: default avatarchenlang <chen.lang5@zte.com.cn>
Co-authored-by: default avatarchenlang <10346245@zte.com.cn>
Co-authored-by: default avataryoukaichao <youkaichao@gmail.com>
Co-authored-by: default avatarJonas M. Kübler <44084297+jmkuebler@users.noreply.github.com>
Co-authored-by: default avatarLi, Jiang <jiang1.li@intel.com>
Co-authored-by: default avatarRussell Bryant <rbryant@redhat.com>
Co-authored-by: default avatarNicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: default avatarAlonKejzman <alonkeizman@gmail.com>
Co-authored-by: default avatarMichael Goin <mgoin64@gmail.com>
Co-authored-by: default avatarLucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Co-authored-by: default avatarTao Hui <taohui3@gmail.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
Co-authored-by: default avatarJee Jee Li <pandaleefree@gmail.com>
Co-authored-by: default avatarEkagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com>
Co-authored-by: default avatarNick Hill <nhill@redhat.com>
Co-authored-by: default avatarZhuohan Li <zhuohan123@gmail.com>
Co-authored-by: default avatarYe (Charlotte) Qi <yeq@meta.com>
Co-authored-by: default avatartomeras91 <57313761+tomeras91@users.noreply.github.com>
Co-authored-by: default avatarShu Wang <shuw@nvidia.com>
Co-authored-by: default avatarAleksandr Malyshev <164964928+maleksan85@users.noreply.github.com>
Co-authored-by: default avatarAleksandr Malyshev <maleksan@amd.com>
Co-authored-by: default avatarDoug Lehr <douglehr@amd.com>
Co-authored-by: default avatarEugene Khvedchenya <ekhvedchenya@gmail.com>
Co-authored-by: default avataryitingdc <59356937+yitingdc@users.noreply.github.com>
Co-authored-by: default avatarAndrew Sansom <andrew@protopia.ai>
Co-authored-by: default avatarxaguilar-amd <xavier.aguilarfruto@amd.com>
Co-authored-by: default avatarIceber Gu <caiwei95@hotmail.com>
Co-authored-by: default avatarTao He <linzhu.ht@alibaba-inc.com>
Co-authored-by: default avatarIcey <1790571317@qq.com>
Co-authored-by: default avatarSage Moore <sage@neuralmagic.com>
Co-authored-by: default avatarRobert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
Co-authored-by: default avatarXu Wenqing <121550081+Xu-Wenqing@users.noreply.github.com>
Co-authored-by: default avatarChih-Chieh Yang <7364402+cyang49@users.noreply.github.com>
Co-authored-by: default avatarRishiAstra <40644327+RishiAstra@users.noreply.github.com>
Co-authored-by: default avatarChauncey <chaunceyjiang@gmail.com>
Co-authored-by: default avatarSeiji Eicher <58963096+eicherseiji@users.noreply.github.com>
Co-authored-by: default avatarRui Qiao <161574667+ruisearch42@users.noreply.github.com>
Co-authored-by: default avatarJiangyun Zhu <riverclouds.zhu@qq.com>
Co-authored-by: default avatarLuka Govedič <ProExpertProg@users.noreply.github.com>
Co-authored-by: default avatar阿丹(adan) <47373076+LDLINGLINGLING@users.noreply.github.com>
Co-authored-by: default avatarliudan <adan@minicpm.com>
Co-authored-by: default avatarliudan <liudan@qq.com>
Co-authored-by: default avatarLucia Fang <116399278+luccafong@users.noreply.github.com>
Co-authored-by: default avatarClouddude <kouss.hd@gmail.com>
Co-authored-by: default avatarFrank Wang <41319051+frankwang28@users.noreply.github.com>
Co-authored-by: default avatarfhl2000 <63384265+fhl2000@users.noreply.github.com>
Co-authored-by: default avatarqizixi <22851944+zixi-qi@users.noreply.github.com>
Co-authored-by: default avatarBram Wasti <bwasti@fb.com>
Co-authored-by: default avatarNaman Lalit <nl2688@nyu.edu>
Co-authored-by: default avatarChenheli Hua <huachenheli@outlook.com>
Co-authored-by: default avatarWeiQing Chen <40507679+david6666666@users.noreply.github.com>
Co-authored-by: default avatarJunhong <liujunhong11@huawei.com>
Co-authored-by: default avatarLJH-LBJ <98734602+LJH-LBJ@users.noreply.github.com>
Co-authored-by: default avatar22quinn <33176974+22quinn@users.noreply.github.com>
Co-authored-by: default avatarXiaohan Zou <renovamenzxh@gmail.com>
Co-authored-by: default avatarrentianyue-jk <rentianyue-jk@360shuke.com>
Co-authored-by: default avatarTyler Michael Smith <tlrmchlsmth@gmail.com>
Co-authored-by: default avatarPeter Pan <peter.pan@daocloud.io>
Co-authored-by: default avatarPatrick C. Toulme <135739773+patrick-toulme@users.noreply.github.com>
Co-authored-by: default avatarClayton Coleman <smarterclayton@gmail.com>
Co-authored-by: default avatarJialin Ouyang <Jialin.Ouyang@gmail.com>
Co-authored-by: default avatarJialin Ouyang <jialino@meta.com>
Co-authored-by: default avatarweiliang <weiliangl@nvidia.com>
Co-authored-by: default avatarYuxuan Zhang <2448370773@qq.com>
Co-authored-by: default avatarJJJYmmm <92386084+JJJYmmm@users.noreply.github.com>
Co-authored-by: default avatarliuye.hj <liuye.hj@alibaba-inc.com>
Co-authored-by: default avatarJuechen Liu <grinchcoder@gmail.com>
Co-authored-by: default avatarRobert Shaw <robshaw@redhat.com>
Co-authored-by: default avatarThomas Parnell <tpa@zurich.ibm.com>
Co-authored-by: default avatarYingjun Mou <renzomou@gmail.com>
Co-authored-by: zhoukz's avatarZhou Jiahao <me@zhoukz.com>
Co-authored-by: default avatarChenxi Yang <cxyang@cs.utexas.edu>
Co-authored-by: default avatarChenxi Yang <cxyang@fb.com>
Co-authored-by: default avatarRahul Tuli <rtuli@redhat.com>
Co-authored-by: default avatarLee Nau <lee.nau@gmail.com>
Co-authored-by: default avatarAdrian Abeyta <aabeyta@redhat.com>
Co-authored-by: default avatarGregory Shtrasberg <156009573+gshtras@users.noreply.github.com>
Co-authored-by: default avatarAaron Pham <contact@aarnphm.xyz>
Co-authored-by: default avataracisseJZhong <40467976+acisseJZhong@users.noreply.github.com>
Co-authored-by: default avatarSimon Danielsson <70206058+simondanielsson@users.noreply.github.com>
Co-authored-by: default avatarYongye Zhu <zyy1102000@gmail.com>
Co-authored-by: default avatarChen Zhang <zhangch99@outlook.com>
Co-authored-by: default avatarLucas Wilkinson <lwilkins@redhat.com>
Co-authored-by: default avatarLucia Fang <fanglu@meta.com>
Co-authored-by: default avatarSiyuan Fu <siyuanf@nvidia.com>
Co-authored-by: default avatarXiaozhu Meng <mxz297@gmail.com>
Co-authored-by: default avatarBarry Kang <43644113+Barry-Delaney@users.noreply.github.com>
Co-authored-by: default avatara120092009 <33205509+a120092009@users.noreply.github.com>
Co-authored-by: default avatarSergio Paniego Blanco <sergiopaniegoblanco@gmail.com>
Co-authored-by: default avatarCSWYF3634076 <wangyafeng@baidu.com>
Co-authored-by: default avatarLehua Ding <lehuading@tencent.com>
Co-authored-by: default avatarReza Barazesh <3146276+rzabarazesh@users.noreply.github.com>
Co-authored-by: default avatarihb2032 <40718643+ihb2032@users.noreply.github.com>
Co-authored-by: default avatarAsaf Joseph Gardin <39553475+Josephasafg@users.noreply.github.com>
Co-authored-by: default avatarAnion <123177548+Anionex@users.noreply.github.com>
Co-authored-by: default avatarPavani Majety <pmajety@nvidia.com>
Co-authored-by: default avatarbnellnm <49004751+bnellnm@users.noreply.github.com>
Co-authored-by: default avatarOr Ozeri <oro@il.ibm.com>
Co-authored-by: default avatarcjackal <44624812+cjackal@users.noreply.github.com>
Co-authored-by: default avatarDavid Ben-David <sdavidbd@gmail.com>
Co-authored-by: default avatarDavid Ben-David <davidb@pliops.com>
Co-authored-by: default avatarAndrew Xia <axia@mit.edu>
Co-authored-by: default avatarAndrew Xia <axia@fb.com>
Co-authored-by: default avatarSalvatore Cena <cena@cenas.it>
Co-authored-by: default avatarParam <psch@cs.unc.edu>
Co-authored-by: default avatarZhewen Li <zhewenli@meta.com>
Co-authored-by: default avatarnadathurv <work.vnadathur@gmail.com>
Co-authored-by: default avatarSrreyansh Sethi <107075589+WorldExplored@users.noreply.github.com>
Co-authored-by: default avatarWenlong Wang <wangwenlong2755@gmail.com>
Co-authored-by: default avatarbillishyahao <bill.he@amd.com>
Co-authored-by: default avatarNathan Scott <natoscott@users.noreply.github.com>
Co-authored-by: default avatarKenichi Maehashi <939877+kmaehashi@users.noreply.github.com>
Co-authored-by: default avatarJohnny <johnnync13@gmail.com>
Co-authored-by: default avatarAidyn-A <31858918+Aidyn-A@users.noreply.github.com>
Co-authored-by: default avatarHuamin Li <3ericli@gmail.com>
Co-authored-by: default avatarrshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: default avatarHosang <156028780+hyoon1@users.noreply.github.com>
Co-authored-by: default avatarJerry Zhang <jerryzh168@gmail.com>
Co-authored-by: default avatarpwschuurman <psch@google.com>
Co-authored-by: default avatarHuy Do <huydhn@gmail.com>
Co-authored-by: default avatarleo-pony <nengjunma@outlook.com>
Co-authored-by: default avatarvllmellm <vllm.ellm@embeddedllm.com>
Co-authored-by: default avatarElizaWszola <ewszola@redhat.com>
Co-authored-by: default avatarLuka Govedič <lgovedic@redhat.com>
Co-authored-by: default avatarBenjamin Chislett <bchislett@nvidia.com>
Co-authored-by: default avatarAndrew Xia <axia@meta.com>
Co-authored-by: default avatarSimon Mo <simon.mo@hey.com>
Co-authored-by: default avatarTJian <tunjian.tan@embeddedllm.com>
Co-authored-by: default avatarahao-anyscale <ahao@anyscale.com>
Co-authored-by: default avatarVarun Sundar Rabindranath <varunsundar08@gmail.com>
Co-authored-by: default avatarVarun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: default avatarLiu-congo <1502632128@qq.com>
Co-authored-by: default avatarHUIJONG JEONG <64083281+huijjj@users.noreply.github.com>
Co-authored-by: default avatarYannick Schnider <Yannick.Schnider1@ibm.com>
Co-authored-by: default avatarkyt <eluban4532@gmail.com>
Co-authored-by: default avatarEgor <e.a.krivov@gmail.com>
Co-authored-by: default avatarYang Liu <127183760+KKSK-DON@users.noreply.github.com>
Co-authored-by: default avatarPaul Pak <52512091+paulpak58@users.noreply.github.com>
Co-authored-by: default avatarwhx <56632993+whx-sjtu@users.noreply.github.com>
Co-authored-by: default avatarXiang Si <sixiang@google.com>
Co-authored-by: default avatarAleksandr Samarin <samarin_ad@mail.ru>
Co-authored-by: default avatarJun Jiang <jasl9187@hotmail.com>
Co-authored-by: default avatarChendi.Xue <chendi.xue@intel.com>
Co-authored-by: default avatarNikhil G <nrghosh@users.noreply.github.com>
parent 9fc983c7
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <hip/hip_bf16.h> #include <hip/hip_bf16.h>
#include <hip/hip_bfloat16.h> #include <hip/hip_bfloat16.h>
#include "../../../attention/attention_dtypes.h" #include "../../../../attention/attention_dtypes.h"
namespace vllm { namespace vllm {
#ifdef USE_ROCM #ifdef USE_ROCM
......
#include "common.cuh" #include "common.cuh"
#include "dispatch_utils.h" #include "dispatch_utils.h"
#include "../../cub_helpers.h" #include "cub_helpers.h"
#include "../vectorization_utils.cuh" #include "quantization/vectorization_utils.cuh"
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <ATen/cuda/Exceptions.h> #include <ATen/cuda/Exceptions.h>
......
#pragma once #pragma once
#include "../../../attention/attention_dtypes.h" #include "../../../../attention/attention_dtypes.h"
#include <assert.h> #include <assert.h>
#include <float.h> #include <float.h>
#include <stdint.h> #include <stdint.h>
......
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include "../per_token_group_quant_8bit.h" #include "quantization/w8a8/per_token_group_quant_8bit.h"
#include <cmath> #include <cmath>
...@@ -8,9 +8,9 @@ ...@@ -8,9 +8,9 @@
#include <torch/all.h> #include <torch/all.h>
#include "../vectorization.cuh" #include "quantization/vectorization.cuh"
#include "../vectorization_utils.cuh" #include "quantization/vectorization_utils.cuh"
#include "../../dispatch_utils.h" #include "dispatch_utils.h"
__device__ __forceinline__ float GroupReduceMax(float val) { __device__ __forceinline__ float GroupReduceMax(float val) {
unsigned mask = threadIdx.x % 32 >= 16 ? 0xffff0000 : 0x0000ffff; unsigned mask = threadIdx.x % 32 >= 16 ? 0xffff0000 : 0x0000ffff;
...@@ -212,4 +212,4 @@ void per_token_group_quant_fp8(const torch::Tensor& input, ...@@ -212,4 +212,4 @@ void per_token_group_quant_fp8(const torch::Tensor& input,
double fp8_max, bool scale_ue8m0) { double fp8_max, bool scale_ue8m0) {
per_token_group_quant_8bit(input, output_q, output_s, group_size, eps, per_token_group_quant_8bit(input, output_q, output_s, group_size, eps,
fp8_min, fp8_max, scale_ue8m0); fp8_min, fp8_max, scale_ue8m0);
} }
\ No newline at end of file
#include <ATen/cuda/CUDAContext.h>
#include <torch/all.h>
#include "quantization/w8a8/per_token_group_quant_8bit.h"
void per_token_group_quant_int8(const torch::Tensor& input,
torch::Tensor& output_q,
torch::Tensor& output_s, int64_t group_size,
double eps, double int8_min, double int8_max) {
per_token_group_quant_8bit(input, output_q, output_s, group_size, eps,
int8_min, int8_max);
}
\ No newline at end of file
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <torch/all.h> #include <torch/all.h>
#ifndef USE_ROCM
#include "../per_token_group_quant_8bit.h"
#endif
#include <cmath> #include <cmath>
#include "../../cub_helpers.h" #include "dispatch_utils.h"
#include "../../dispatch_utils.h" #include "quantization/vectorization_utils.cuh"
#include "../vectorization_utils.cuh" #include "cub_helpers.h"
static inline __device__ int8_t float_to_int8_rn(float x) { static inline __device__ int8_t float_to_int8_rn(float x) {
#ifdef USE_ROCM #ifdef USE_ROCM
...@@ -25,7 +21,6 @@ static inline __device__ int8_t float_to_int8_rn(float x) { ...@@ -25,7 +21,6 @@ static inline __device__ int8_t float_to_int8_rn(float x) {
float dst = std::nearbyint(x); float dst = std::nearbyint(x);
// saturate // saturate
// See https://github.com/pytorch/pytorch/issues/127666 // See https://github.com/pytorch/pytorch/issues/127666
// See https://github.com/llvm/llvm-project/issues/95183 // See https://github.com/llvm/llvm-project/issues/95183
// hip-clang std::clamp __glibcxx_assert_fail host function when building on // hip-clang std::clamp __glibcxx_assert_fail host function when building on
...@@ -84,7 +79,6 @@ static inline __device__ int8_t int32_to_int8(int32_t x) { ...@@ -84,7 +79,6 @@ static inline __device__ int8_t int32_to_int8(int32_t x) {
static_cast<int32_t>(std::numeric_limits<int8_t>::max()); static_cast<int32_t>(std::numeric_limits<int8_t>::max());
// saturate // saturate
// See https://github.com/pytorch/pytorch/issues/127666 // See https://github.com/pytorch/pytorch/issues/127666
// See https://github.com/llvm/llvm-project/issues/95183 // See https://github.com/llvm/llvm-project/issues/95183
// hip-clang std::clamp __glibcxx_assert_fail host function when building on // hip-clang std::clamp __glibcxx_assert_fail host function when building on
...@@ -176,7 +170,6 @@ __global__ void dynamic_scaled_int8_quant_kernel( ...@@ -176,7 +170,6 @@ __global__ void dynamic_scaled_int8_quant_kernel(
float inv_s = (absmax == 0.f) ? 0.f : 127.f / absmax; float inv_s = (absmax == 0.f) ? 0.f : 127.f / absmax;
// 2. quantize
vectorize_with_alignment<16>( vectorize_with_alignment<16>(
row_in, row_out, hidden_size, tid, stride, row_in, row_out, hidden_size, tid, stride,
[=] __device__(int8_t& dst, const scalar_t& src) { [=] __device__(int8_t& dst, const scalar_t& src) {
...@@ -194,7 +187,6 @@ struct MinMax { ...@@ -194,7 +187,6 @@ struct MinMax {
__host__ __device__ explicit MinMax(float v) : min(v), max(v) {} __host__ __device__ explicit MinMax(float v) : min(v), max(v) {}
// add a value to the MinMax
__host__ __device__ MinMax& operator+=(float v) { __host__ __device__ MinMax& operator+=(float v) {
min = fminf(min, v); min = fminf(min, v);
max = fmaxf(max, v); max = fmaxf(max, v);
...@@ -228,7 +220,6 @@ __global__ void dynamic_scaled_int8_azp_quant_kernel( ...@@ -228,7 +220,6 @@ __global__ void dynamic_scaled_int8_azp_quant_kernel(
const scalar_t* row_in = input + token_idx * hidden_size; const scalar_t* row_in = input + token_idx * hidden_size;
int8_t* row_out = output + token_idx * hidden_size; int8_t* row_out = output + token_idx * hidden_size;
// 1. calculate min & max
MinMax thread_mm; MinMax thread_mm;
vectorize_read_with_alignment<16>(row_in, hidden_size, tid, stride, vectorize_read_with_alignment<16>(row_in, hidden_size, tid, stride,
[&] __device__(const scalar_t& src) { [&] __device__(const scalar_t& src) {
...@@ -261,7 +252,6 @@ __global__ void dynamic_scaled_int8_azp_quant_kernel( ...@@ -261,7 +252,6 @@ __global__ void dynamic_scaled_int8_azp_quant_kernel(
const float inv_s = 1.f / scale_sh; const float inv_s = 1.f / scale_sh;
const azp_t azp = azp_sh; const azp_t azp = azp_sh;
// 2. quantize
vectorize_with_alignment<16>( vectorize_with_alignment<16>(
row_in, row_out, hidden_size, tid, stride, row_in, row_out, hidden_size, tid, stride,
[=] __device__(int8_t& dst, const scalar_t& src) { [=] __device__(int8_t& dst, const scalar_t& src) {
...@@ -332,14 +322,4 @@ void dynamic_scaled_int8_quant( ...@@ -332,14 +322,4 @@ void dynamic_scaled_int8_quant(
hidden_size); hidden_size);
} }
}); });
} }
\ No newline at end of file
#ifndef USE_ROCM
void per_token_group_quant_int8(const torch::Tensor& input,
torch::Tensor& output_q,
torch::Tensor& output_s, int64_t group_size,
double eps, double int8_min, double int8_max) {
per_token_group_quant_8bit(input, output_q, output_s, group_size, eps,
int8_min, int8_max);
}
#endif
#pragma once #pragma once
#include <torch/all.h> #include <torch/all.h>
// TODO(wentao): refactor the folder to 8bit, then includes fp8 and int8 folders
// 8-bit per-token-group quantization helper used by both FP8 and INT8 // 8-bit per-token-group quantization helper used by both FP8 and INT8
void per_token_group_quant_8bit(const torch::Tensor& input, void per_token_group_quant_8bit(const torch::Tensor& input,
torch::Tensor& output_q, torch::Tensor& output_q,
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include <algorithm> #include <algorithm>
#include "../attention/dtype_fp8.cuh" #include "../attention/dtype_fp8.cuh"
#include "../quantization/fp8/amd/quant_utils.cuh" #include "../quantization/w8a8/fp8/amd/quant_utils.cuh"
// ROCm 6.2 compatibility: map OCP fp8 types to FNUZ variants if OCP is absent // ROCm 6.2 compatibility: map OCP fp8 types to FNUZ variants if OCP is absent
#if !defined(HIP_FP8_TYPE_OCP) #if !defined(HIP_FP8_TYPE_OCP)
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
#include "../cuda_compat.h" #include "../cuda_compat.h"
#include "dispatch_utils.h" #include "dispatch_utils.h"
#include "quantization/fp8/common.cuh" #include "quantization/w8a8/fp8/common.cuh"
#if defined(__HIPCC__) && \ #if defined(__HIPCC__) && \
(defined(__gfx90a__) || defined(__gfx942__) || defined(__gfx950__)) (defined(__gfx90a__) || defined(__gfx942__) || defined(__gfx950__))
......
...@@ -89,8 +89,8 @@ class CutlassScaledMMLinearKernel(ScaledMMLinearKernel): ...@@ -89,8 +89,8 @@ class CutlassScaledMMLinearKernel(ScaledMMLinearKernel):
# azp_adj is the AZP adjustment term, used to account for weights. # azp_adj is the AZP adjustment term, used to account for weights.
# It does not depend on scales or azp, so it is the same for # It does not depend on scales or azp, so it is the same for
# static and dynamic quantization. # static and dynamic quantization.
# For more details, see csrc/quantization/cutlass_w8a8/Epilogues.md # For more details, see csrc/quantization/w8a8/cutlass/Epilogues.md
# https://github.com/vllm-project/vllm/blob/8d59dbb00044a588cab96bcdc028006ed922eb06/csrc/quantization/cutlass_w8a8/Epilogues.md # https://github.com/vllm-project/vllm/blob/main/csrc/quantization/w8a8/cutlass/Epilogues.md
if not self.config.input_symmetric: if not self.config.input_symmetric:
weight = getattr(layer, self.w_q_name) weight = getattr(layer, self.w_q_name)
azp_adj = weight.sum(dim=0, keepdim=True, dtype=torch.int32) azp_adj = weight.sum(dim=0, keepdim=True, dtype=torch.int32)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment