Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
b03d5dc5
Unverified
Commit
b03d5dc5
authored
Mar 04, 2024
by
Fengzhe Zhou
Committed by
GitHub
Mar 04, 2024
Browse files
[Sync] Sync Internal (#941)
parent
bbec7d87
Changes
73
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
359 additions
and
138 deletions
+359
-138
configs/summarizers/compassbench_v1_reason.py
configs/summarizers/compassbench_v1_reason.py
+0
-39
configs/summarizers/groups/plugineval.py
configs/summarizers/groups/plugineval.py
+16
-1
opencompass/datasets/IFEval/instructions_util.py
opencompass/datasets/IFEval/instructions_util.py
+2
-6
opencompass/datasets/TheoremQA.py
opencompass/datasets/TheoremQA.py
+12
-0
opencompass/datasets/hellaswag.py
opencompass/datasets/hellaswag.py
+27
-1
opencompass/datasets/humaneval.py
opencompass/datasets/humaneval.py
+5
-2
opencompass/datasets/natural_question.py
opencompass/datasets/natural_question.py
+4
-3
opencompass/datasets/winogrande.py
opencompass/datasets/winogrande.py
+33
-3
opencompass/models/__init__.py
opencompass/models/__init__.py
+1
-0
opencompass/models/krgpt_api.py
opencompass/models/krgpt_api.py
+134
-0
opencompass/models/openai_api.py
opencompass/models/openai_api.py
+7
-0
opencompass/runners/dlc.py
opencompass/runners/dlc.py
+113
-80
tools/prompt_viewer.py
tools/prompt_viewer.py
+5
-3
No files found.
configs/summarizers/compassbench_v1_reason.py
View file @
b03d5dc5
...
...
@@ -12,36 +12,9 @@ compassbench_v1_reason_groups = [
summarizer
=
dict
(
dataset_abbrs
=
[
[
'reasonbench'
,
'acc_origin'
],
[
'reasonbench_cn_circular'
,
'acc_origin'
],
[
'reasonbench_en_circular'
,
'acc_origin'
],
[
'reasonbench_cn_commonsense_circular'
,
'acc_origin'
],
[
'reasonbench_cn_abductive_circular'
,
'acc_origin'
],
[
'reasonbench_cn_deductive_circular'
,
'acc_origin'
],
[
'reasonbench_cn_inductive_circular'
,
'acc_origin'
],
[
'reasonbench_en_commonsense_circular'
,
'acc_origin'
],
[
'reasonbench_en_abductive_circular'
,
'acc_origin'
],
[
'reasonbench_en_deductive_logiqa_zh_translated_circular'
,
'acc_origin'
],
[
'reasonbench_en_inductive_circular'
,
'acc_origin'
],
[
'reasonbench_cn_commonsense_circular'
,
'acc_origin'
],
[
'reasonbench_cn_abductive_alphanlg_translated_circular'
,
'acc_origin'
],
[
'reasonbench_cn_deductive_bbh3obj_translated_circular'
,
'acc_origin'
],
[
'reasonbench_cn_deductive_logiqa_zh_circular'
,
'acc_origin'
],
[
'reasonbench_cn_inductive_deer_translated_circular'
,
'acc_origin'
],
[
'reasonbench_cn_inductive_selfgenerated_circular'
,
'acc_origin'
],
[
'reasonbench_en_commonsense_circular'
,
'acc_origin'
],
[
'reasonbench_en_abductive_alphanlg_circular'
,
'acc_origin'
],
[
'reasonbench_en_deductive_logiqa_zh_translated_circular'
,
'acc_origin'
],
[
'reasonbench_en_inductive_deer_circular'
,
'acc_origin'
],
[
'reasonbench_en_inductive_selfgenerated_circular'
,
'acc_origin'
],
[
'reasonbench'
,
'perf_circular'
],
[
'reasonbench_cn_circular'
,
'perf_circular'
],
[
'reasonbench_en_circular'
,
'perf_circular'
],
[
'reasonbench_cn_commonsense_circular'
,
'perf_circular'
],
[
'reasonbench_cn_abductive_circular'
,
'perf_circular'
],
[
'reasonbench_cn_deductive_circular'
,
'perf_circular'
],
...
...
@@ -50,18 +23,6 @@ summarizer = dict(
[
'reasonbench_en_abductive_circular'
,
'perf_circular'
],
[
'reasonbench_en_deductive_logiqa_zh_translated_circular'
,
'perf_circular'
],
[
'reasonbench_en_inductive_circular'
,
'perf_circular'
],
[
'reasonbench_cn_commonsense_circular'
,
'perf_circular'
],
[
'reasonbench_cn_abductive_alphanlg_translated_circular'
,
'perf_circular'
],
[
'reasonbench_cn_deductive_bbh3obj_translated_circular'
,
'perf_circular'
],
[
'reasonbench_cn_deductive_logiqa_zh_circular'
,
'perf_circular'
],
[
'reasonbench_cn_inductive_deer_translated_circular'
,
'perf_circular'
],
[
'reasonbench_cn_inductive_selfgenerated_circular'
,
'perf_circular'
],
[
'reasonbench_en_commonsense_circular'
,
'perf_circular'
],
[
'reasonbench_en_abductive_alphanlg_circular'
,
'perf_circular'
],
[
'reasonbench_en_deductive_logiqa_zh_translated_circular'
,
'perf_circular'
],
[
'reasonbench_en_inductive_deer_circular'
,
'perf_circular'
],
[
'reasonbench_en_inductive_selfgenerated_circular'
,
'perf_circular'
],
],
summary_groups
=
compassbench_v1_reason_groups
,
)
configs/summarizers/groups/plugineval.py
View file @
b03d5dc5
...
...
@@ -39,6 +39,22 @@ _base_summary_groups = [
[
'plugin_eval-review_str_v1'
,
'review_quality'
],
],
},
{
'name'
:
'plugin_eval_one_review'
,
'subsets'
:
[
[
'plugin_eval-instruct_v1'
,
'format_metric'
],
[
'plugin_eval-instruct_v1'
,
'args_em_metric'
],
[
'plugin_eval-plan_str_v1'
,
'f1_score'
],
[
'plugin_eval-plan_json_v1'
,
'f1_score'
],
[
'plugin_eval-reason_str_v1'
,
'thought'
],
[
'plugin_eval-reason_retrieve_understand_json_v1'
,
'thought'
],
[
'plugin_eval-retrieve_str_v1'
,
'name'
],
[
'plugin_eval-reason_retrieve_understand_json_v1'
,
'name'
],
[
'plugin_eval-understand_str_v1'
,
'args'
],
[
'plugin_eval-reason_retrieve_understand_json_v1'
,
'args'
],
[
'plugin_eval-review_str_v1'
,
'review_quality'
],
]
},
{
'name'
:
'plugin_eval'
,
'subsets'
:
[
...
...
@@ -53,7 +69,6 @@ _base_summary_groups = [
[
'plugin_eval-understand_str_v1'
,
'args'
],
[
'plugin_eval-reason_retrieve_understand_json_v1'
,
'args'
],
[
'plugin_eval-review_str_v1'
,
'review_quality'
],
[
'copy_plugin_eval-review_str_v1'
,
'naive_average'
],
# a hack for review * 2
]
},
]
...
...
opencompass/datasets/IFEval/instructions_util.py
View file @
b03d5dc5
...
...
@@ -20,16 +20,12 @@ import functools
import
random
import
re
try
:
import
immutabledict
except
ImportError
:
immutabledict
=
None
import
nltk
WORD_LIST
=
[
'western'
,
'sentence'
,
'signal'
,
'dump'
,
'spot'
,
'opposite'
,
'bottom'
,
'potato'
,
'administration'
,
'working'
,
'welcome'
,
'morning'
,
'good'
,
'agency'
,
'primary'
,
'wish'
,
'responsibility'
,
'press'
,
'problem'
,
'president'
,
'steal'
,
'brush'
,
'read'
,
'type'
,
'beat'
,
'trainer'
,
'growth'
,
'lock'
,
'bone'
,
'case'
,
'equal'
,
'comfortable'
,
'region'
,
'replacement'
,
'performance'
,
'mate'
,
'walk'
,
'medicine'
,
'film'
,
'thing'
,
'rock'
,
'tap'
,
'total'
,
'competition'
,
'ease'
,
'south'
,
'establishment'
,
'gather'
,
'parking'
,
'world'
,
'plenty'
,
'breath'
,
'claim'
,
'alcohol'
,
'trade'
,
'dear'
,
'highlight'
,
'street'
,
'matter'
,
'decision'
,
'mess'
,
'agreement'
,
'studio'
,
'coach'
,
'assist'
,
'brain'
,
'wing'
,
'style'
,
'private'
,
'top'
,
'brown'
,
'leg'
,
'buy'
,
'procedure'
,
'method'
,
'speed'
,
'high'
,
'company'
,
'valuable'
,
'pie'
,
'analyst'
,
'session'
,
'pattern'
,
'district'
,
'pleasure'
,
'dinner'
,
'swimming'
,
'joke'
,
'order'
,
'plate'
,
'department'
,
'motor'
,
'cell'
,
'spend'
,
'cabinet'
,
'difference'
,
'power'
,
'examination'
,
'engine'
,
'horse'
,
'dimension'
,
'pay'
,
'toe'
,
'curve'
,
'literature'
,
'bother'
,
'fire'
,
'possibility'
,
'debate'
,
'activity'
,
'passage'
,
'hello'
,
'cycle'
,
'background'
,
'quiet'
,
'author'
,
'effect'
,
'actor'
,
'page'
,
'bicycle'
,
'error'
,
'throat'
,
'attack'
,
'character'
,
'phone'
,
'tea'
,
'increase'
,
'outcome'
,
'file'
,
'specific'
,
'inspector'
,
'internal'
,
'potential'
,
'staff'
,
'building'
,
'employer'
,
'shoe'
,
'hand'
,
'direction'
,
'garden'
,
'purchase'
,
'interview'
,
'study'
,
'recognition'
,
'member'
,
'spiritual'
,
'oven'
,
'sandwich'
,
'weird'
,
'passenger'
,
'particular'
,
'response'
,
'reaction'
,
'size'
,
'variation'
,
'a'
,
'cancel'
,
'candy'
,
'exit'
,
'guest'
,
'condition'
,
'fly'
,
'price'
,
'weakness'
,
'convert'
,
'hotel'
,
'great'
,
'mouth'
,
'mind'
,
'song'
,
'sugar'
,
'suspect'
,
'telephone'
,
'ear'
,
'roof'
,
'paint'
,
'refrigerator'
,
'organization'
,
'jury'
,
'reward'
,
'engineering'
,
'day'
,
'possession'
,
'crew'
,
'bar'
,
'road'
,
'description'
,
'celebration'
,
'score'
,
'mark'
,
'letter'
,
'shower'
,
'suggestion'
,
'sir'
,
'luck'
,
'national'
,
'progress'
,
'hall'
,
'stroke'
,
'theory'
,
'offer'
,
'story'
,
'tax'
,
'definition'
,
'history'
,
'ride'
,
'medium'
,
'opening'
,
'glass'
,
'elevator'
,
'stomach'
,
'question'
,
'ability'
,
'leading'
,
'village'
,
'computer'
,
'city'
,
'grand'
,
'confidence'
,
'candle'
,
'priest'
,
'recommendation'
,
'point'
,
'necessary'
,
'body'
,
'desk'
,
'secret'
,
'horror'
,
'noise'
,
'culture'
,
'warning'
,
'water'
,
'round'
,
'diet'
,
'flower'
,
'bus'
,
'tough'
,
'permission'
,
'week'
,
'prompt'
,
'connection'
,
'abuse'
,
'height'
,
'save'
,
'corner'
,
'border'
,
'stress'
,
'drive'
,
'stop'
,
'rip'
,
'meal'
,
'listen'
,
'confusion'
,
'girlfriend'
,
'living'
,
'relation'
,
'significance'
,
'plan'
,
'creative'
,
'atmosphere'
,
'blame'
,
'invite'
,
'housing'
,
'paper'
,
'drink'
,
'roll'
,
'silver'
,
'drunk'
,
'age'
,
'damage'
,
'smoke'
,
'environment'
,
'pack'
,
'savings'
,
'influence'
,
'tourist'
,
'rain'
,
'post'
,
'sign'
,
'grandmother'
,
'run'
,
'profit'
,
'push'
,
'clerk'
,
'final'
,
'wine'
,
'swim'
,
'pause'
,
'stuff'
,
'singer'
,
'funeral'
,
'average'
,
'source'
,
'scene'
,
'tradition'
,
'personal'
,
'snow'
,
'nobody'
,
'distance'
,
'sort'
,
'sensitive'
,
'animal'
,
'major'
,
'negotiation'
,
'click'
,
'mood'
,
'period'
,
'arrival'
,
'expression'
,
'holiday'
,
'repeat'
,
'dust'
,
'closet'
,
'gold'
,
'bad'
,
'sail'
,
'combination'
,
'clothes'
,
'emphasis'
,
'duty'
,
'black'
,
'step'
,
'school'
,
'jump'
,
'document'
,
'professional'
,
'lip'
,
'chemical'
,
'front'
,
'wake'
,
'while'
,
'inside'
,
'watch'
,
'row'
,
'subject'
,
'penalty'
,
'balance'
,
'possible'
,
'adult'
,
'aside'
,
'sample'
,
'appeal'
,
'wedding'
,
'depth'
,
'king'
,
'award'
,
'wife'
,
'blow'
,
'site'
,
'camp'
,
'music'
,
'safe'
,
'gift'
,
'fault'
,
'guess'
,
'act'
,
'shame'
,
'drama'
,
'capital'
,
'exam'
,
'stupid'
,
'record'
,
'sound'
,
'swing'
,
'novel'
,
'minimum'
,
'ratio'
,
'machine'
,
'shape'
,
'lead'
,
'operation'
,
'salary'
,
'cloud'
,
'affair'
,
'hit'
,
'chapter'
,
'stage'
,
'quantity'
,
'access'
,
'army'
,
'chain'
,
'traffic'
,
'kick'
,
'analysis'
,
'airport'
,
'time'
,
'vacation'
,
'philosophy'
,
'ball'
,
'chest'
,
'thanks'
,
'place'
,
'mountain'
,
'advertising'
,
'red'
,
'past'
,
'rent'
,
'return'
,
'tour'
,
'house'
,
'construction'
,
'net'
,
'native'
,
'war'
,
'figure'
,
'fee'
,
'spray'
,
'user'
,
'dirt'
,
'shot'
,
'task'
,
'stick'
,
'friend'
,
'software'
,
'promotion'
,
'interaction'
,
'surround'
,
'block'
,
'purpose'
,
'practice'
,
'conflict'
,
'routine'
,
'requirement'
,
'bonus'
,
'hole'
,
'state'
,
'junior'
,
'sweet'
,
'catch'
,
'tear'
,
'fold'
,
'wall'
,
'editor'
,
'life'
,
'position'
,
'pound'
,
'respect'
,
'bathroom'
,
'coat'
,
'script'
,
'job'
,
'teach'
,
'birth'
,
'view'
,
'resolve'
,
'theme'
,
'employee'
,
'doubt'
,
'market'
,
'education'
,
'serve'
,
'recover'
,
'tone'
,
'harm'
,
'miss'
,
'union'
,
'understanding'
,
'cow'
,
'river'
,
'association'
,
'concept'
,
'training'
,
'recipe'
,
'relationship'
,
'reserve'
,
'depression'
,
'proof'
,
'hair'
,
'revenue'
,
'independent'
,
'lift'
,
'assignment'
,
'temporary'
,
'amount'
,
'loss'
,
'edge'
,
'track'
,
'check'
,
'rope'
,
'estimate'
,
'pollution'
,
'stable'
,
'message'
,
'delivery'
,
'perspective'
,
'mirror'
,
'assistant'
,
'representative'
,
'witness'
,
'nature'
,
'judge'
,
'fruit'
,
'tip'
,
'devil'
,
'town'
,
'emergency'
,
'upper'
,
'drop'
,
'stay'
,
'human'
,
'neck'
,
'speaker'
,
'network'
,
'sing'
,
'resist'
,
'league'
,
'trip'
,
'signature'
,
'lawyer'
,
'importance'
,
'gas'
,
'choice'
,
'engineer'
,
'success'
,
'part'
,
'external'
,
'worker'
,
'simple'
,
'quarter'
,
'student'
,
'heart'
,
'pass'
,
'spite'
,
'shift'
,
'rough'
,
'lady'
,
'grass'
,
'community'
,
'garage'
,
'youth'
,
'standard'
,
'skirt'
,
'promise'
,
'blind'
,
'television'
,
'disease'
,
'commission'
,
'positive'
,
'energy'
,
'calm'
,
'presence'
,
'tune'
,
'basis'
,
'preference'
,
'head'
,
'common'
,
'cut'
,
'somewhere'
,
'presentation'
,
'current'
,
'thought'
,
'revolution'
,
'effort'
,
'master'
,
'implement'
,
'republic'
,
'floor'
,
'principle'
,
'stranger'
,
'shoulder'
,
'grade'
,
'button'
,
'tennis'
,
'police'
,
'collection'
,
'account'
,
'register'
,
'glove'
,
'divide'
,
'professor'
,
'chair'
,
'priority'
,
'combine'
,
'peace'
,
'extension'
,
'maybe'
,
'evening'
,
'frame'
,
'sister'
,
'wave'
,
'code'
,
'application'
,
'mouse'
,
'match'
,
'counter'
,
'bottle'
,
'half'
,
'cheek'
,
'resolution'
,
'back'
,
'knowledge'
,
'make'
,
'discussion'
,
'screw'
,
'length'
,
'accident'
,
'battle'
,
'dress'
,
'knee'
,
'log'
,
'package'
,
'it'
,
'turn'
,
'hearing'
,
'newspaper'
,
'layer'
,
'wealth'
,
'profile'
,
'imagination'
,
'answer'
,
'weekend'
,
'teacher'
,
'appearance'
,
'meet'
,
'bike'
,
'rise'
,
'belt'
,
'crash'
,
'bowl'
,
'equivalent'
,
'support'
,
'image'
,
'poem'
,
'risk'
,
'excitement'
,
'remote'
,
'secretary'
,
'public'
,
'produce'
,
'plane'
,
'display'
,
'money'
,
'sand'
,
'situation'
,
'punch'
,
'customer'
,
'title'
,
'shake'
,
'mortgage'
,
'option'
,
'number'
,
'pop'
,
'window'
,
'extent'
,
'nothing'
,
'experience'
,
'opinion'
,
'departure'
,
'dance'
,
'indication'
,
'boy'
,
'material'
,
'band'
,
'leader'
,
'sun'
,
'beautiful'
,
'muscle'
,
'farmer'
,
'variety'
,
'fat'
,
'handle'
,
'director'
,
'opportunity'
,
'calendar'
,
'outside'
,
'pace'
,
'bath'
,
'fish'
,
'consequence'
,
'put'
,
'owner'
,
'go'
,
'doctor'
,
'information'
,
'share'
,
'hurt'
,
'protection'
,
'career'
,
'finance'
,
'force'
,
'golf'
,
'garbage'
,
'aspect'
,
'kid'
,
'food'
,
'boot'
,
'milk'
,
'respond'
,
'objective'
,
'reality'
,
'raw'
,
'ring'
,
'mall'
,
'one'
,
'impact'
,
'area'
,
'news'
,
'international'
,
'series'
,
'impress'
,
'mother'
,
'shelter'
,
'strike'
,
'loan'
,
'month'
,
'seat'
,
'anything'
,
'entertainment'
,
'familiar'
,
'clue'
,
'year'
,
'glad'
,
'supermarket'
,
'natural'
,
'god'
,
'cost'
,
'conversation'
,
'tie'
,
'ruin'
,
'comfort'
,
'earth'
,
'storm'
,
'percentage'
,
'assistance'
,
'budget'
,
'strength'
,
'beginning'
,
'sleep'
,
'other'
,
'young'
,
'unit'
,
'fill'
,
'store'
,
'desire'
,
'hide'
,
'value'
,
'cup'
,
'maintenance'
,
'nurse'
,
'function'
,
'tower'
,
'role'
,
'class'
,
'camera'
,
'database'
,
'panic'
,
'nation'
,
'basket'
,
'ice'
,
'art'
,
'spirit'
,
'chart'
,
'exchange'
,
'feedback'
,
'statement'
,
'reputation'
,
'search'
,
'hunt'
,
'exercise'
,
'nasty'
,
'notice'
,
'male'
,
'yard'
,
'annual'
,
'collar'
,
'date'
,
'platform'
,
'plant'
,
'fortune'
,
'passion'
,
'friendship'
,
'spread'
,
'cancer'
,
'ticket'
,
'attitude'
,
'island'
,
'active'
,
'object'
,
'service'
,
'buyer'
,
'bite'
,
'card'
,
'face'
,
'steak'
,
'proposal'
,
'patient'
,
'heat'
,
'rule'
,
'resident'
,
'broad'
,
'politics'
,
'west'
,
'knife'
,
'expert'
,
'girl'
,
'design'
,
'salt'
,
'baseball'
,
'grab'
,
'inspection'
,
'cousin'
,
'couple'
,
'magazine'
,
'cook'
,
'dependent'
,
'security'
,
'chicken'
,
'version'
,
'currency'
,
'ladder'
,
'scheme'
,
'kitchen'
,
'employment'
,
'local'
,
'attention'
,
'manager'
,
'fact'
,
'cover'
,
'sad'
,
'guard'
,
'relative'
,
'county'
,
'rate'
,
'lunch'
,
'program'
,
'initiative'
,
'gear'
,
'bridge'
,
'breast'
,
'talk'
,
'dish'
,
'guarantee'
,
'beer'
,
'vehicle'
,
'reception'
,
'woman'
,
'substance'
,
'copy'
,
'lecture'
,
'advantage'
,
'park'
,
'cold'
,
'death'
,
'mix'
,
'hold'
,
'scale'
,
'tomorrow'
,
'blood'
,
'request'
,
'green'
,
'cookie'
,
'church'
,
'strip'
,
'forever'
,
'beyond'
,
'debt'
,
'tackle'
,
'wash'
,
'following'
,
'feel'
,
'maximum'
,
'sector'
,
'sea'
,
'property'
,
'economics'
,
'menu'
,
'bench'
,
'try'
,
'language'
,
'start'
,
'call'
,
'solid'
,
'address'
,
'income'
,
'foot'
,
'senior'
,
'honey'
,
'few'
,
'mixture'
,
'cash'
,
'grocery'
,
'link'
,
'map'
,
'form'
,
'factor'
,
'pot'
,
'model'
,
'writer'
,
'farm'
,
'winter'
,
'skill'
,
'anywhere'
,
'birthday'
,
'policy'
,
'release'
,
'husband'
,
'lab'
,
'hurry'
,
'mail'
,
'equipment'
,
'sink'
,
'pair'
,
'driver'
,
'consideration'
,
'leather'
,
'skin'
,
'blue'
,
'boat'
,
'sale'
,
'brick'
,
'two'
,
'feed'
,
'square'
,
'dot'
,
'rush'
,
'dream'
,
'location'
,
'afternoon'
,
'manufacturer'
,
'control'
,
'occasion'
,
'trouble'
,
'introduction'
,
'advice'
,
'bet'
,
'eat'
,
'kill'
,
'category'
,
'manner'
,
'office'
,
'estate'
,
'pride'
,
'awareness'
,
'slip'
,
'crack'
,
'client'
,
'nail'
,
'shoot'
,
'membership'
,
'soft'
,
'anybody'
,
'web'
,
'official'
,
'individual'
,
'pizza'
,
'interest'
,
'bag'
,
'spell'
,
'profession'
,
'queen'
,
'deal'
,
'resource'
,
'ship'
,
'guy'
,
'chocolate'
,
'joint'
,
'formal'
,
'upstairs'
,
'car'
,
'resort'
,
'abroad'
,
'dealer'
,
'associate'
,
'finger'
,
'surgery'
,
'comment'
,
'team'
,
'detail'
,
'crazy'
,
'path'
,
'tale'
,
'initial'
,
'arm'
,
'radio'
,
'demand'
,
'single'
,
'draw'
,
'yellow'
,
'contest'
,
'piece'
,
'quote'
,
'pull'
,
'commercial'
,
'shirt'
,
'contribution'
,
'cream'
,
'channel'
,
'suit'
,
'discipline'
,
'instruction'
,
'concert'
,
'speech'
,
'low'
,
'effective'
,
'hang'
,
'scratch'
,
'industry'
,
'breakfast'
,
'lay'
,
'join'
,
'metal'
,
'bedroom'
,
'minute'
,
'product'
,
'rest'
,
'temperature'
,
'many'
,
'give'
,
'argument'
,
'print'
,
'purple'
,
'laugh'
,
'health'
,
'credit'
,
'investment'
,
'sell'
,
'setting'
,
'lesson'
,
'egg'
,
'middle'
,
'marriage'
,
'level'
,
'evidence'
,
'phrase'
,
'love'
,
'self'
,
'benefit'
,
'guidance'
,
'affect'
,
'you'
,
'dad'
,
'anxiety'
,
'special'
,
'boyfriend'
,
'test'
,
'blank'
,
'payment'
,
'soup'
,
'obligation'
,
'reply'
,
'smile'
,
'deep'
,
'complaint'
,
'addition'
,
'review'
,
'box'
,
'towel'
,
'minor'
,
'fun'
,
'soil'
,
'issue'
,
'cigarette'
,
'internet'
,
'gain'
,
'tell'
,
'entry'
,
'spare'
,
'incident'
,
'family'
,
'refuse'
,
'branch'
,
'can'
,
'pen'
,
'grandfather'
,
'constant'
,
'tank'
,
'uncle'
,
'climate'
,
'ground'
,
'volume'
,
'communication'
,
'kind'
,
'poet'
,
'child'
,
'screen'
,
'mine'
,
'quit'
,
'gene'
,
'lack'
,
'charity'
,
'memory'
,
'tooth'
,
'fear'
,
'mention'
,
'marketing'
,
'reveal'
,
'reason'
,
'court'
,
'season'
,
'freedom'
,
'land'
,
'sport'
,
'audience'
,
'classroom'
,
'law'
,
'hook'
,
'win'
,
'carry'
,
'eye'
,
'smell'
,
'distribution'
,
'research'
,
'country'
,
'dare'
,
'hope'
,
'whereas'
,
'stretch'
,
'library'
,
'if'
,
'delay'
,
'college'
,
'plastic'
,
'book'
,
'present'
,
'use'
,
'worry'
,
'champion'
,
'goal'
,
'economy'
,
'march'
,
'election'
,
'reflection'
,
'midnight'
,
'slide'
,
'inflation'
,
'action'
,
'challenge'
,
'guitar'
,
'coast'
,
'apple'
,
'campaign'
,
'field'
,
'jacket'
,
'sense'
,
'way'
,
'visual'
,
'remove'
,
'weather'
,
'trash'
,
'cable'
,
'regret'
,
'buddy'
,
'beach'
,
'historian'
,
'courage'
,
'sympathy'
,
'truck'
,
'tension'
,
'permit'
,
'nose'
,
'bed'
,
'son'
,
'person'
,
'base'
,
'meat'
,
'usual'
,
'air'
,
'meeting'
,
'worth'
,
'game'
,
'independence'
,
'physical'
,
'brief'
,
'play'
,
'raise'
,
'board'
,
'she'
,
'key'
,
'writing'
,
'pick'
,
'command'
,
'party'
,
'yesterday'
,
'spring'
,
'candidate'
,
'physics'
,
'university'
,
'concern'
,
'development'
,
'change'
,
'string'
,
'target'
,
'instance'
,
'room'
,
'bitter'
,
'bird'
,
'football'
,
'normal'
,
'split'
,
'impression'
,
'wood'
,
'long'
,
'meaning'
,
'stock'
,
'cap'
,
'leadership'
,
'media'
,
'ambition'
,
'fishing'
,
'essay'
,
'salad'
,
'repair'
,
'today'
,
'designer'
,
'night'
,
'bank'
,
'drawing'
,
'inevitable'
,
'phase'
,
'vast'
,
'chip'
,
'anger'
,
'switch'
,
'cry'
,
'twist'
,
'personality'
,
'attempt'
,
'storage'
,
'being'
,
'preparation'
,
'bat'
,
'selection'
,
'white'
,
'technology'
,
'contract'
,
'side'
,
'section'
,
'station'
,
'till'
,
'structure'
,
'tongue'
,
'taste'
,
'truth'
,
'difficulty'
,
'group'
,
'limit'
,
'main'
,
'move'
,
'feeling'
,
'light'
,
'example'
,
'mission'
,
'might'
,
'wait'
,
'wheel'
,
'shop'
,
'host'
,
'classic'
,
'alternative'
,
'cause'
,
'agent'
,
'consist'
,
'table'
,
'airline'
,
'text'
,
'pool'
,
'craft'
,
'range'
,
'fuel'
,
'tool'
,
'partner'
,
'load'
,
'entrance'
,
'deposit'
,
'hate'
,
'article'
,
'video'
,
'summer'
,
'feature'
,
'extreme'
,
'mobile'
,
'hospital'
,
'flight'
,
'fall'
,
'pension'
,
'piano'
,
'fail'
,
'result'
,
'rub'
,
'gap'
,
'system'
,
'report'
,
'suck'
,
'ordinary'
,
'wind'
,
'nerve'
,
'ask'
,
'shine'
,
'note'
,
'line'
,
'mom'
,
'perception'
,
'brother'
,
'reference'
,
'bend'
,
'charge'
,
'treat'
,
'trick'
,
'term'
,
'homework'
,
'bake'
,
'bid'
,
'status'
,
'project'
,
'strategy'
,
'orange'
,
'let'
,
'enthusiasm'
,
'parent'
,
'concentrate'
,
'device'
,
'travel'
,
'poetry'
,
'business'
,
'society'
,
'kiss'
,
'end'
,
'vegetable'
,
'employ'
,
'schedule'
,
'hour'
,
'brave'
,
'focus'
,
'process'
,
'movie'
,
'illegal'
,
'general'
,
'coffee'
,
'ad'
,
'highway'
,
'chemistry'
,
'psychology'
,
'hire'
,
'bell'
,
'conference'
,
'relief'
,
'show'
,
'neat'
,
'funny'
,
'weight'
,
'quality'
,
'club'
,
'daughter'
,
'zone'
,
'touch'
,
'tonight'
,
'shock'
,
'burn'
,
'excuse'
,
'name'
,
'survey'
,
'landscape'
,
'advance'
,
'satisfaction'
,
'bread'
,
'disaster'
,
'item'
,
'hat'
,
'prior'
,
'shopping'
,
'visit'
,
'east'
,
'photo'
,
'home'
,
'idea'
,
'father'
,
'comparison'
,
'cat'
,
'pipe'
,
'winner'
,
'count'
,
'lake'
,
'fight'
,
'prize'
,
'foundation'
,
'dog'
,
'keep'
,
'ideal'
,
'fan'
,
'struggle'
,
'peak'
,
'safety'
,
'solution'
,
'hell'
,
'conclusion'
,
'population'
,
'strain'
,
'alarm'
,
'measurement'
,
'second'
,
'train'
,
'race'
,
'due'
,
'insurance'
,
'boss'
,
'tree'
,
'monitor'
,
'sick'
,
'course'
,
'drag'
,
'appointment'
,
'slice'
,
'still'
,
'care'
,
'patience'
,
'rich'
,
'escape'
,
'emotion'
,
'royal'
,
'female'
,
'childhood'
,
'government'
,
'picture'
,
'will'
,
'sock'
,
'big'
,
'gate'
,
'oil'
,
'cross'
,
'pin'
,
'improvement'
,
'championship'
,
'silly'
,
'help'
,
'sky'
,
'pitch'
,
'man'
,
'diamond'
,
'most'
,
'transition'
,
'work'
,
'science'
,
'committee'
,
'moment'
,
'fix'
,
'teaching'
,
'dig'
,
'specialist'
,
'complex'
,
'guide'
,
'people'
,
'dead'
,
'voice'
,
'original'
,
'break'
,
'topic'
,
'data'
,
'degree'
,
'reading'
,
'recording'
,
'bunch'
,
'reach'
,
'judgment'
,
'lie'
,
'regular'
,
'set'
,
'painting'
,
'mode'
,
'list'
,
'player'
,
'bear'
,
'north'
,
'wonder'
,
'carpet'
,
'heavy'
,
'officer'
,
'negative'
,
'clock'
,
'unique'
,
'baby'
,
'pain'
,
'assumption'
,
'disk'
,
'iron'
,
'bill'
,
'drawer'
,
'look'
,
'double'
,
'mistake'
,
'finish'
,
'future'
,
'brilliant'
,
'contact'
,
'math'
,
'rice'
,
'leave'
,
'restaurant'
,
'discount'
,
'sex'
,
'virus'
,
'bit'
,
'trust'
,
'event'
,
'wear'
,
'juice'
,
'failure'
,
'bug'
,
'context'
,
'mud'
,
'whole'
,
'wrap'
,
'intention'
,
'draft'
,
'pressure'
,
'cake'
,
'dark'
,
'explanation'
,
'space'
,
'angle'
,
'word'
,
'efficiency'
,
'management'
,
'habit'
,
'star'
,
'chance'
,
'finding'
,
'transportation'
,
'stand'
,
'criticism'
,
'flow'
,
'door'
,
'injury'
,
'insect'
,
'surprise'
,
'apartment'
]
# pylint: disable=line-too-long
# ISO 639-1 codes to language names.
LANGUAGE_CODES
=
immutabledict
.
immutabledict
(
{
LANGUAGE_CODES
=
{
'en'
:
'English'
,
'es'
:
'Spanish'
,
'pt'
:
'Portuguese'
,
...
...
@@ -60,7 +56,7 @@ LANGUAGE_CODES = immutabledict.immutabledict({
'pa'
:
'Punjabi'
,
'ml'
:
'Malayalam'
,
'fi'
:
'Finnish'
,
}
)
}
_ALPHABETS
=
'([A-Za-z])'
_PREFIXES
=
'(Mr|St|Mrs|Ms|Dr)[.]'
...
...
opencompass/datasets/TheoremQA.py
View file @
b03d5dc5
...
...
@@ -24,3 +24,15 @@ def TheoremQA_postprocess(text: str) -> str:
else
:
text
=
matches
[
0
].
strip
().
strip
(
'.,?!
\"\'
;:'
)
return
text
def
TheoremQA_postprocess_v2
(
text
:
str
)
->
str
:
prediction
=
text
.
strip
().
strip
(
'
\n
'
).
split
(
'
\n
'
)[
-
1
]
tmp
=
''
for
entry
in
prediction
.
split
(
' '
)[::
-
1
]:
if
entry
==
'is'
or
entry
==
'be'
or
entry
==
'are'
or
entry
.
endswith
(
':'
):
break
tmp
=
entry
+
' '
+
tmp
prediction
=
tmp
.
strip
().
strip
(
'.'
)
return
prediction
opencompass/datasets/hellaswag.py
View file @
b03d5dc5
import
json
import
os.path
as
osp
from
datasets
import
Dataset
from
datasets
import
Dataset
,
DatasetDict
from
opencompass.registry
import
LOAD_DATASET
...
...
@@ -71,6 +71,32 @@ class hellaswagDataset_V3(BaseDataset):
return
dataset
@
LOAD_DATASET
.
register_module
()
class
hellaswagDatasetwithICE
(
BaseDataset
):
@
staticmethod
def
load
(
path
):
dataset_dict
=
DatasetDict
()
for
split
,
filename
in
[
[
'train'
,
'hellaswag_train_sampled25.jsonl'
],
[
'val'
,
'hellaswag.jsonl'
],
]:
dataset
=
[]
with
open
(
osp
.
join
(
path
,
filename
),
'r'
,
encoding
=
'utf-8'
)
as
f
:
for
line
in
f
:
data
=
json
.
loads
(
line
)
dataset
.
append
({
'ctx'
:
data
[
'query'
].
split
(
': '
,
1
)[
-
1
],
'A'
:
data
[
'choices'
][
0
],
'B'
:
data
[
'choices'
][
1
],
'C'
:
data
[
'choices'
][
2
],
'D'
:
data
[
'choices'
][
3
],
'label'
:
'ABCD'
[
data
[
'gold'
]],
})
dataset_dict
[
split
]
=
Dataset
.
from_list
(
dataset
)
return
dataset_dict
class
hellaswagDatasetClean
(
BaseDataset
):
# load the contamination annotations of CEval from
...
...
opencompass/datasets/humaneval.py
View file @
b03d5dc5
...
...
@@ -156,10 +156,13 @@ def humaneval_postprocess_v2(text: str) -> str:
"""This is an advanced version of previous postprocess to handle more
situations, better to use this one."""
try
:
# for chatGLM r
aw
text
text
=
eval
(
text
)
# for chatGLM r
elated
text
eval_
text
=
eval
(
text
)
except
Exception
:
pass
else
:
if
isinstance
(
eval_text
,
str
):
text
=
eval_text
text
=
text
.
lstrip
(
'
\n
'
)
if
'```'
in
text
:
blocks
=
re
.
findall
(
r
'```(.*?)```'
,
text
,
re
.
DOTALL
)
...
...
opencompass/datasets/natural_question.py
View file @
b03d5dc5
...
...
@@ -77,9 +77,10 @@ class NQEvaluator(BaseEvaluator):
cnt
=
0
for
pred
,
cand_ans
in
zip
(
processed_predictions
,
processed_answers
):
detail
=
{
'pred'
:
pred
,
'answer'
:
cand_ans
,
'correct'
:
False
}
cnt
+=
int
(
any
([
cand
==
pred
for
cand
in
cand_ans
]))
if
int
(
any
([
cand
==
pred
for
cand
in
cand_ans
])):
detail
[
'correct'
]
=
True
# is_correct = any([cand == pred for cand in cand_ans])
is_correct
=
any
([
cand
in
pred
for
cand
in
cand_ans
])
cnt
+=
int
(
is_correct
)
detail
[
'correct'
]
=
is_correct
details
.
append
(
detail
)
score
=
cnt
/
len
(
predictions
)
*
100
...
...
opencompass/datasets/winogrande.py
View file @
b03d5dc5
import
json
import
os
from
datasets
import
Dataset
from
datasets
import
Dataset
,
DatasetDict
from
opencompass.registry
import
LOAD_DATASET
...
...
@@ -20,12 +20,12 @@ class winograndeDataset(BaseDataset):
for
line
in
f
:
line
=
json
.
loads
(
line
)
prompt
=
line
[
'sentence'
]
continue_prompt
=
prompt
.
split
(
'_'
)
continue_prompt
=
prompt
.
split
(
'_'
)
[
1
]
data_item
=
{
'opt1'
:
prompt
.
replace
(
'_'
,
line
[
'option1'
]),
'opt2'
:
prompt
.
replace
(
'_'
,
line
[
'option2'
]),
'answer'
:
line
[
'answer'
],
'cont'
:
continue_prompt
[
1
]
'cont'
:
continue_prompt
,
}
dataset_list
.
append
(
data_item
)
dataset_list
=
Dataset
.
from_list
(
dataset_list
)
...
...
@@ -44,13 +44,43 @@ class winograndeDataset_V2(BaseDataset):
for
line
in
f
:
line
=
json
.
loads
(
line
)
prompt
=
line
[
'sentence'
]
continue_prompt
=
prompt
.
split
(
'_'
)[
1
]
answer
=
line
[
'answer'
]
answer
=
' AB'
[
int
(
answer
)]
if
answer
!=
''
else
'NULL'
data_item
=
{
'opt1'
:
prompt
.
replace
(
'_'
,
line
[
'option1'
]),
'opt2'
:
prompt
.
replace
(
'_'
,
line
[
'option2'
]),
'answer'
:
answer
,
'cont'
:
continue_prompt
,
}
dataset_list
.
append
(
data_item
)
dataset_list
=
Dataset
.
from_list
(
dataset_list
)
return
dataset_list
@
LOAD_DATASET
.
register_module
()
class
winograndeDataset_V3
(
BaseDataset
):
"""Disconnect from Huggingface, winograndeDataset_V2."""
@
staticmethod
def
load
(
path
):
dataset_dict
=
DatasetDict
()
for
split
in
[
'train_xs'
,
'dev'
]:
filename
=
os
.
path
.
join
(
path
,
f
'
{
split
}
.jsonl'
)
dataset_list
=
[]
with
open
(
filename
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
for
line
in
f
:
line
=
json
.
loads
(
line
)
prompt
=
line
[
'sentence'
]
continue_prompt
=
prompt
.
split
(
'_'
)[
1
]
answer
=
line
[
'answer'
]
answer
=
' AB'
[
int
(
answer
)]
if
answer
!=
''
else
'NULL'
data_item
=
{
'opt1'
:
prompt
.
replace
(
'_'
,
line
[
'option1'
]),
'opt2'
:
prompt
.
replace
(
'_'
,
line
[
'option2'
]),
'answer'
:
answer
,
'cont'
:
continue_prompt
,
}
dataset_list
.
append
(
data_item
)
dataset_dict
[
split
]
=
Dataset
.
from_list
(
dataset_list
)
return
dataset_dict
opencompass/models/__init__.py
View file @
b03d5dc5
...
...
@@ -13,6 +13,7 @@ from .huggingface import HuggingFace # noqa: F401, F403
from
.huggingface
import
HuggingFaceCausalLM
# noqa: F401, F403
from
.huggingface
import
HuggingFaceChatGLM3
# noqa: F401, F403
from
.intern_model
import
InternLM
# noqa: F401, F403
from
.krgpt_api
import
KrGPT
# noqa: F401
from
.lightllm_api
import
LightllmAPI
# noqa: F401
from
.llama2
import
Llama2
,
Llama2Chat
# noqa: F401, F403
from
.lmdeploy_pytorch
import
LmdeployPytorchModel
# noqa: F401
...
...
opencompass/models/krgpt_api.py
0 → 100644
View file @
b03d5dc5
import
json
from
concurrent.futures
import
ThreadPoolExecutor
from
typing
import
Dict
,
List
,
Optional
,
Union
import
requests
from
opencompass.registry
import
MODELS
from
opencompass.utils.logging
import
get_logger
from
opencompass.utils.prompt
import
PromptList
from
.base_api
import
BaseAPIModel
PromptType
=
Union
[
PromptList
,
str
]
@
MODELS
.
register_module
()
class
KrGPT
(
BaseAPIModel
):
is_api
:
bool
=
True
def
__init__
(
self
,
path
:
str
=
'KrGPT'
,
url
:
str
=
'http://101.69.162.5:9300/v1/chat/completions'
,
max_seq_len
:
int
=
2048
,
meta_template
:
Optional
[
Dict
]
=
None
,
retry
:
int
=
2
,
generation_kwargs
:
Optional
[
Dict
]
=
dict
(),
):
super
().
__init__
(
path
=
path
,
max_seq_len
=
max_seq_len
,
meta_template
=
meta_template
,
retry
=
retry
,
generation_kwargs
=
generation_kwargs
,
)
self
.
logger
=
get_logger
()
self
.
url
=
url
self
.
generation_kwargs
=
generation_kwargs
self
.
max_out_len
=
self
.
generation_kwargs
.
get
(
'max_new_tokens'
,
1024
)
def
generate
(
self
,
inputs
:
List
[
str
],
max_out_len
:
int
,
**
kwargs
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
inputs (List[str]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with
ThreadPoolExecutor
()
as
executor
:
results
=
list
(
executor
.
map
(
self
.
_generate
,
inputs
,
[
self
.
max_out_len
]
*
len
(
inputs
)))
return
results
def
_generate
(
self
,
input
:
PromptType
,
max_out_len
:
int
,
temperature
:
float
=
0.0
)
->
str
:
"""Generate results given a list of inputs.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
temperature (float): What sampling temperature to use,
between 0 and 2. Higher values like 0.8 will make the output
more random, while lower values like 0.2 will make it more
focused and deterministic.
Returns:
str: The generated string.
"""
assert
isinstance
(
input
,
(
str
,
PromptList
))
if
isinstance
(
input
,
str
):
messages
=
[{
'role'
:
'user'
,
'content'
:
input
}]
else
:
messages
=
[]
for
item
in
input
:
msg
=
{
'content'
:
item
[
'prompt'
]}
if
item
[
'role'
]
==
'HUMAN'
:
msg
[
'role'
]
=
'user'
elif
item
[
'role'
]
==
'BOT'
:
msg
[
'role'
]
=
'assistant'
elif
item
[
'role'
]
==
'SYSTEM'
:
msg
[
'role'
]
=
'system'
messages
.
append
(
msg
)
max_num_retries
=
0
while
max_num_retries
<
self
.
retry
:
header
=
{
'content-type'
:
'application/json'
}
try
:
data
=
dict
(
messages
=
messages
)
raw_response
=
requests
.
post
(
self
.
url
,
headers
=
header
,
data
=
json
.
dumps
(
data
))
except
requests
.
ConnectionError
:
self
.
logger
.
error
(
'Got connection error, retrying...'
)
continue
try
:
response
=
raw_response
.
json
()
except
requests
.
JSONDecodeError
:
self
.
logger
.
error
(
'JsonDecode error, got'
,
str
(
raw_response
.
content
))
continue
try
:
return
response
[
'choices'
][
0
][
'message'
][
'content'
].
strip
()
except
KeyError
:
self
.
logger
.
error
(
'Find error message in response: '
,
str
(
response
))
# if 'error' in response:
# if response['error']['code'] == 'rate_limit_exceeded':
# time.sleep(1)
# continue
# elif response['error']['code'] == 'insufficient_quota':
# self.invalid_keys.add(key)
# self.logger.warn(f'insufficient_quota key: {key}')
# continue
# self.logger.error('Find error message in response: ',
# str(response['error']))
max_num_retries
+=
1
raise
RuntimeError
(
'Calling OpenAI failed after retrying for '
f
'
{
max_num_retries
}
times. Check the logs for '
'details.'
)
opencompass/models/openai_api.py
View file @
b03d5dc5
...
...
@@ -415,6 +415,13 @@ class OpenAIAllesAPIN(OpenAI):
self
.
logger
.
error
(
data
)
else
:
return
choices
[
0
][
'message'
][
'content'
].
strip
()
try
:
match
=
re
.
match
(
r
'Error code: \d+ - (.*)'
,
response
[
'data'
])
err
=
eval
(
match
.
group
(
1
))[
'error'
]
if
err
[
'code'
]
==
'content_filter'
and
err
[
'status'
]
==
400
:
return
err
[
'message'
]
except
Exception
:
pass
self
.
logger
.
error
(
response
[
'msg'
])
self
.
logger
.
error
(
response
)
time
.
sleep
(
1
)
...
...
opencompass/runners/dlc.py
View file @
b03d5dc5
import
datetime
import
json
import
os
import
os.path
as
osp
import
random
...
...
@@ -38,6 +39,7 @@ class DLCRunner(BaseRunner):
task
:
ConfigDict
,
aliyun_cfg
:
ConfigDict
,
max_num_workers
:
int
=
32
,
eval_with_gpu
:
list
=
[
'plugin_eval'
],
retry
:
int
=
2
,
debug
:
bool
=
False
,
lark_bot_url
:
str
=
None
):
...
...
@@ -46,6 +48,8 @@ class DLCRunner(BaseRunner):
self
.
max_num_workers
=
max_num_workers
self
.
retry
=
retry
self
.
eval_with_gpu
=
eval_with_gpu
logger
=
get_logger
()
logger
.
warning
(
'To ensure the integrity of the log results, the log displayed '
...
...
@@ -93,19 +97,62 @@ class DLCRunner(BaseRunner):
num_gpus
=
task
.
num_gpus
task_name
=
task
.
name
is_eval_task
=
'OpenICLEval'
in
task_name
if
is_eval_task
and
num_gpus
==
0
:
for
check_name
in
self
.
eval_with_gpu
:
if
check_name
in
task_name
:
num_gpus
=
1
break
# Dump task config to file
mmengine
.
mkdir_or_exist
(
'tmp/'
)
param_file
=
f
'tmp/
{
os
.
getpid
()
}
_params.py'
pwd
=
os
.
getcwd
()
try
:
cfg
.
dump
(
param_file
)
if
self
.
aliyun_cfg
.
get
(
'bashrc_path'
)
is
not
None
:
# using user's conda env
bashrc_path
=
self
.
aliyun_cfg
[
'bashrc_path'
]
assert
osp
.
exists
(
bashrc_path
)
assert
self
.
aliyun_cfg
.
get
(
'conda_env_name'
)
is
not
None
conda_env_name
=
self
.
aliyun_cfg
[
'conda_env_name'
]
shell_cmd
=
(
f
'source
{
bashrc_path
}
; '
f
'conda activate
{
conda_env_name
}
; '
)
else
:
# using public conda env
# users can also set `python_env_path` to their
# own env python path
assert
self
.
aliyun_cfg
.
get
(
'python_env_path'
)
is
not
None
shell_cmd
=
(
f
'export PATH=
{
self
.
aliyun_cfg
[
"python_env_path"
]
}
/bin:$PATH; '
# noqa: E501
f
'export PYTHONPATH=
{
pwd
}
:$PYTHONPATH; '
)
huggingface_cache
=
self
.
aliyun_cfg
.
get
(
'huggingface_cache'
)
if
huggingface_cache
is
not
None
:
# HUGGINGFACE_HUB_CACHE is a Legacy env variable, here we set
# `HF_HUB_CACHE` and `HUGGINGFACE_HUB_CACHE` for bc
shell_cmd
+=
f
'export HF_HUB_CACHE=
{
huggingface_cache
}
; '
shell_cmd
+=
f
'export HUGGINGFACE_HUB_CACHE=
{
huggingface_cache
}
; '
# noqa: E501
torch_cache
=
self
.
aliyun_cfg
.
get
(
'torch_cache'
)
if
torch_cache
is
not
None
:
shell_cmd
+=
f
'export TORCH_HOME=
{
torch_cache
}
; '
hf_offline
=
self
.
aliyun_cfg
.
get
(
'hf_offline'
,
True
)
if
hf_offline
:
shell_cmd
+=
'export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1; export HF_EVALUATE_OFFLINE=1; '
# noqa: E501
http_proxy
=
self
.
aliyun_cfg
.
get
(
'http_proxy'
)
if
http_proxy
is
not
None
:
shell_cmd
+=
f
'export http_proxy=
{
http_proxy
}
; export https_proxy=
{
http_proxy
}
; '
# noqa: E501
shell_cmd
+=
f
'export HTTP_PROXY=
{
http_proxy
}
; export HTTPS_PROXY=
{
http_proxy
}
; '
# noqa: E501
# Build up DLC command
pwd
=
os
.
getcwd
()
shell_cmd
=
(
f
'source
{
self
.
aliyun_cfg
[
"bashrc_path"
]
}
; '
f
'conda activate
{
self
.
aliyun_cfg
[
"conda_env_name"
]
}
; '
f
'cd
{
pwd
}
; '
'{task_cmd}'
)
hf_endpoint
=
self
.
aliyun_cfg
.
get
(
'hf_endpoint'
)
if
hf_endpoint
is
not
None
:
shell_cmd
+=
f
'export HF_ENDPOINT=
{
hf_endpoint
}
; '
shell_cmd
+=
f
'cd
{
pwd
}
; '
shell_cmd
+=
'{task_cmd}'
tmpl
=
(
'dlc create job'
f
" --command '
{
shell_cmd
}
'"
...
...
@@ -114,11 +161,10 @@ class DLCRunner(BaseRunner):
f
" -c
{
self
.
aliyun_cfg
[
'dlc_config_path'
]
}
"
f
" --workspace_id
{
self
.
aliyun_cfg
[
'workspace_id'
]
}
"
' --worker_count 1'
f
' --worker_cpu
{
max
(
num_gpus
*
6
,
8
)
}
'
f
' --worker_cpu
{
max
(
num_gpus
*
8
,
32
)
}
'
f
' --worker_gpu
{
num_gpus
}
'
f
' --worker_memory
{
max
(
num_gpus
*
64
,
48
)
}
'
f
" --worker_image
{
self
.
aliyun_cfg
[
'worker_image'
]
}
"
' --interactive'
)
f
' --worker_memory
{
max
(
num_gpus
*
128
,
256
)
}
'
f
" --worker_image
{
self
.
aliyun_cfg
[
'worker_image'
]
}
"
)
get_cmd
=
partial
(
task
.
get_command
,
cfg_path
=
param_file
,
template
=
tmpl
)
...
...
@@ -139,77 +185,64 @@ class DLCRunner(BaseRunner):
time
.
sleep
(
random
.
randint
(
0
,
10
))
def
_run_within_retry
():
try
:
process
=
subprocess
.
Popen
(
cmd
,
shell
=
True
,
text
=
True
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
job_id
=
None
job_allocated
=
False
job_finished
=
False
last_end_time
=
datetime
.
datetime
.
now
().
strftime
(
'%Y-%m-%dT%H:%M:%SZ'
)
while
True
:
if
not
job_allocated
:
line
=
process
.
stdout
.
readline
()
if
not
line
:
break
match
=
re
.
search
(
r
'(dlc[0-9a-z]+)'
,
line
)
if
match
and
job_id
is
None
:
job_id
=
match
.
group
(
1
)
stdout
.
write
(
line
)
match
=
re
.
search
(
r
'Job .* is \[Running\]'
,
line
)
if
match
:
job_allocated
=
True
else
:
try
:
process
.
wait
(
10
)
except
subprocess
.
TimeoutExpired
:
pass
else
:
job_finished
=
True
if
job_finished
:
this_end_time
=
datetime
.
datetime
.
now
(
).
strftime
(
'%Y-%m-%dT%H:%M:%SZ'
)
else
:
this_end_time
=
(
datetime
.
datetime
.
now
()
-
datetime
.
timedelta
(
seconds
=
10
)
).
strftime
(
'%Y-%m-%dT%H:%M:%SZ'
)
logs_cmd
=
(
'dlc logs'
output
=
subprocess
.
getoutput
(
cmd
)
match
=
re
.
search
(
r
'\|\s+(dlc[0-9a-z]+)\s+\|'
,
output
)
if
match
is
None
:
raise
RuntimeError
(
f
'Failed to launch dlc job for
{
output
}
'
)
else
:
job_id
=
match
.
group
(
1
)
stdout
.
write
(
output
)
pod_create_time
=
None
pri_time
=
None
initial_time
=
datetime
.
datetime
.
now
()
while
True
:
# 1. Avoid to request dlc too frequently.
# 2. DLC job may not be ready immediately after creation.
for
_
in
range
(
5
):
time
.
sleep
(
2
)
try
:
job_info
=
json
.
loads
(
subprocess
.
getoutput
(
f
'dlc get job
{
job_id
}
'
))
break
except
:
# noqa: E722
pass
else
:
raise
RuntimeError
(
f
'Failed to get job info for
{
job_id
}
'
)
status
=
job_info
[
'Status'
]
if
status
==
'Failed'
:
return
-
1
elif
status
==
'Succeeded'
:
return
0
elif
status
!=
'Running'
:
continue
# The pod time could be different from the real time.
# Therefore we need to extract the pod start time from
# the `job_info` and calculate the `start_time` and
# `end_time` in pod.
if
pod_create_time
is
None
:
pod_create_time
=
job_info
[
'GmtCreateTime'
]
pri_time
=
pod_create_time
pod_create_time
=
datetime
.
datetime
.
strptime
(
pod_create_time
,
'%Y-%m-%dT%H:%M:%SZ'
)
elasped_time
=
datetime
.
datetime
.
now
()
-
initial_time
cur_time
=
(
pod_create_time
+
elasped_time
).
strftime
(
'%Y-%m-%dT%H:%M:%SZ'
)
logs_cmd
=
(
'dlc logs'
f
'
{
job_id
}
{
job_id
}
-worker-0'
f
' --start_time
{
last_end_time
}
'
f
' --end_time
{
this_end_time
}
'
f
" -c
{
self
.
aliyun_cfg
[
'dlc_config_path'
]
}
"
)
log_process
=
subprocess
.
Popen
(
logs_cmd
,
shell
=
True
,
text
=
True
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
log_output
,
log_err
=
log_process
.
communicate
()
log_output
=
'
\n
'
.
join
(
log_output
.
split
(
'
\n
'
)[
2
:])
stdout
.
write
(
log_output
)
last_end_time
=
this_end_time
f
" -c
{
self
.
aliyun_cfg
[
'dlc_config_path'
]
}
"
f
' --start_time
{
pri_time
}
'
f
' --end_time
{
cur_time
}
'
)
log_output
=
subprocess
.
getoutput
(
logs_cmd
)
if
'[WARN] No logs found for the pod'
not
in
log_output
:
pri_time
=
cur_time
stdout
.
write
(
log_output
)
stdout
.
flush
()
if
job_finished
:
break
process
.
wait
()
return
process
.
returncode
finally
:
if
job_id
is
not
None
:
cancel_cmd
=
(
'dlc stop job'
f
'
{
job_id
}
'
f
" -c
{
self
.
aliyun_cfg
[
'dlc_config_path'
]
}
"
' -f'
)
subprocess
.
run
(
cancel_cmd
,
shell
=
True
,
text
=
True
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
return_code
=
_run_within_retry
()
retry
=
self
.
retry
...
...
tools/prompt_viewer.py
View file @
b03d5dc5
...
...
@@ -6,7 +6,8 @@ from mmengine.config import Config, ConfigDict
from
opencompass.openicl.icl_inferencer
import
(
AgentInferencer
,
ChatInferencer
,
CLPInferencer
,
GenInferencer
,
PPLInferencer
,
GenInferencer
,
LLInferencer
,
PPLInferencer
,
PPLOnlyInferencer
)
from
opencompass.registry
import
ICL_PROMPT_TEMPLATES
,
ICL_RETRIEVERS
from
opencompass.utils
import
(
Menu
,
build_dataset_from_cfg
,
...
...
@@ -81,14 +82,15 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
supported_inferencer
=
[
AgentInferencer
,
PPLInferencer
,
GenInferencer
,
CLPInferencer
,
PPLOnlyInferencer
,
ChatInferencer
PPLOnlyInferencer
,
ChatInferencer
,
LLInferencer
]
if
infer_cfg
.
inferencer
.
type
not
in
supported_inferencer
:
print
(
f
'Only
{
supported_inferencer
}
are supported'
)
return
for
idx
in
range
(
min
(
count
,
len
(
ice_idx_list
))):
if
issubclass
(
infer_cfg
.
inferencer
.
type
,
PPLInferencer
):
if
issubclass
(
infer_cfg
.
inferencer
.
type
,
(
PPLInferencer
,
LLInferencer
)):
labels
=
retriever
.
get_labels
(
ice_template
=
ice_template
,
prompt_template
=
prompt_template
)
ice
=
retriever
.
generate_ice
(
ice_idx_list
[
idx
],
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment