""" This is a simple application for sentence embeddings: clustering Sentences are mapped to sentence embeddings and then k-mean clustering is applied. """ from sentence_transformers import SentenceTransformer from sklearn.cluster import KMeans embedder = SentenceTransformer("all-MiniLM-L6-v2") # Corpus with example sentences corpus = [ "A man is eating food.", "A man is eating a piece of bread.", "A man is eating pasta.", "The girl is carrying a baby.", "The baby is carried by the woman", "A man is riding a horse.", "A man is riding a white horse on an enclosed ground.", "A monkey is playing drums.", "Someone in a gorilla costume is playing a set of drums.", "A cheetah is running behind its prey.", "A cheetah chases prey on across a field.", ] corpus_embeddings = embedder.encode(corpus) # Perform kmean clustering num_clusters = 5 clustering_model = KMeans(n_clusters=num_clusters) clustering_model.fit(corpus_embeddings) cluster_assignment = clustering_model.labels_ clustered_sentences = [[] for i in range(num_clusters)] for sentence_id, cluster_id in enumerate(cluster_assignment): clustered_sentences[cluster_id].append(corpus[sentence_id]) for i, cluster in enumerate(clustered_sentences): print("Cluster ", i + 1) print(cluster) print("")