# 2.1 If there is one or more than one common entity in last_turn, control sentence and response. No need to use entity as control.
# 2.2 If the entity only exists in control sentence and response, use this as the control code.
# 2.3 If there is no overlaped entity or words between control sentence and response, skip this data sample.
# 2.4 If there is no overlapped entity but there are overlapped words, add entity in the control sentence (if any) as the control code if it is not in the dialog context
# TODO
# In general, need to trim the control sentence when it is too long.
# calculate common entity between control sentence and response
common_entity_list=[]
forctrl_entityincontrol_sent_entities:
forresp_entityinresponse_entities:
ifresp_entityinctrl_entity:
common_entity_list.append(ctrl_entity)
break
elifctrl_entityinresp_entity:
common_entity_list.append(resp_entity)
break
iflen(common_entity_list)==0:
# calculate overlap between control sentence and response
control_word_list=control_sent.split()
response_word_list=response.split()
# response_word_table = {wn_lemma.lemmatize(word): True for word in response_word_list}
print("number of skip sentences: %d (one contain another: %d + no overlap: %d)"%(n_skip,n_skip_one_contain_another,n_skip_no_overlap))
print("Total data size: %d. Number of control case: %d (entity control: %d + overlap control: %d)"%(len(output_data),n_control,n_entity_control,n_overlap_control))
print("Number of control code: %d; number of control case: %d; number of control case without control code: %d (averaged control code per case: %.4f)"%(total_num_control_code,n_control,n_control_without_code,avg_num_control_code))