| """ |
| This example computes the score between a query and all possible |
| sentences in a corpus using a Cross-Encoder for semantic textual similarity (STS). |
| It output then the most similar sentences for the given query. |
| """ |
| from sentence_transformers.cross_encoder import CrossEncoder |
| import numpy as np |
|
|
| |
| model = CrossEncoder('cross-encoder/stsb-distilroberta-base') |
|
|
| |
| query = 'A man is eating pasta.' |
|
|
| |
| corpus = ['A man is eating food.', |
| 'A man is eating a piece of bread.', |
| 'The girl is carrying a baby.', |
| 'A man is riding a horse.', |
| 'A woman is playing violin.', |
| 'Two men pushed carts through the woods.', |
| 'A man is riding a white horse on an enclosed ground.', |
| 'A monkey is playing drums.', |
| 'A cheetah is running behind its prey.' |
| ] |
|
|
| |
| sentence_combinations = [[query, corpus_sentence] for corpus_sentence in corpus] |
|
|
| |
| similarity_scores = model.predict(sentence_combinations) |
|
|
| |
| sim_scores_argsort = reversed(np.argsort(similarity_scores)) |
|
|
| |
| print("Query:", query) |
| for idx in sim_scores_argsort: |
| print("{:.2f}\t{}".format(similarity_scores[idx], corpus[idx])) |
|
|