# 2.1 If there is one or more than one common entity in last_turn, control sentence and response. No need to use entity as control.
# 2.2 If the entity only exists in control sentence and response, use this as the control code.
# 2.3 If there is no overlaped entity or words between control sentence and response, skip this data sample.
# 2.4 If there is no overlapped entity but there are overlapped words, add entity in the control sentence (if any) as the control code if it is not in the dialog context
# TODO
# In general, need to trim the control sentence when it is too long.
# Need to lowercase to match?
# calculate common entity between control sentence and response
common_entity_list=[]
forctrl_entityincontrol_sent_entities:
forresp_entityinresponse_entities:
ifresp_entityinctrl_entity:
common_entity_list.append(ctrl_entity)
break
elifctrl_entityinresp_entity:
common_entity_list.append(resp_entity)
break
iflen(common_entity_list)==0:
# calculate overlap between control sentence and response
print("number of skip sentences: %d (one contain another: %d + no overlap: %d)"%(n_skip,n_skip_one_contain_another,n_skip_no_overlap))
print("Total data size: %d. Number of control case: %d (entity control: %d + overlap control: %d)"%(len(output_data),n_control,n_entity_control,n_overlap_control))
print("Number of control code: %d vs. number of control case: %d (averaged control code per case: %.4f)"%(total_num_control_code,n_control,avg_num_control_code))