File size: 5,668 Bytes
f7842f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import nltk
import random
import asyncio
nltk.download('wordnet')
from nltk.corpus import wordnet
from sentence_transformers import util
from load_models import load_nlp_models, load_llama, load_qa_models
from utils import QuestionGenerationError

nlp, s2v = load_nlp_models()
llm = load_llama()
similarity_model, spell = load_qa_models()
context_model = similarity_model

def get_similar_words_sense2vec(word, n=3):
    # Try to find the word with its most likely part-of-speech
    word_with_pos = word + "|NOUN"
    if word_with_pos in s2v:
        similar_words = s2v.most_similar(word_with_pos, n=n)
        return [word.split("|")[0] for word, _ in similar_words]
    
    # If not found, try without POS
    if word in s2v:
        similar_words = s2v.most_similar(word, n=n)
        return [word.split("|")[0] for word, _ in similar_words]
    
    return []

def get_synonyms(word, n=3):
    synonyms = []
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            if lemma.name() != word and lemma.name() not in synonyms:
                synonyms.append(lemma.name())
                if len(synonyms) == n:
                    return synonyms
    return synonyms

def gen_options(answer,context,question):
    prompt=f'''Given the following context, question, and correct answer, 

    generate {4} incorrect but plausible answer options. The options should be:

    1. Contextually related to the given context

    2. Grammatically consistent with the question

    3. Different from the correct answer

    4. Not explicitly mentioned in the given context



    Context: {context}

    Question: {question}

    Correct Answer: {answer}



    Provide the options in a semi colon-separated list. Output must contain only the options and nothing else.

    '''
    options= [answer]
    response = llm.invoke(prompt, stop=['<|eot_id|>'])
    incorrect_options = [option.strip() for option in response.split(';')]
    options.extend(incorrect_options)
    random.shuffle(options)
    print(options)
    return options
    # print(response)

def generate_options(answer, context, n=3):
    options = [answer]
    
    # Add contextually relevant words using a pre-trained model
    context_embedding = context_model.encode(context)
    answer_embedding = context_model.encode(answer)
    context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]

    # Compute similarity scores and sort context words
    similarity_scores = [util.pytorch_cos_sim(context_model.encode(word), answer_embedding).item() for word in context_words]
    sorted_context_words = [word for _, word in sorted(zip(similarity_scores, context_words), reverse=True)]
    options.extend(sorted_context_words[:n])

    # Try to get similar words based on sense2vec
    similar_words = get_similar_words_sense2vec(answer, n)
    options.extend(similar_words)
    
    # If we don't have enough options, try synonyms
    if len(options) < n + 1:
        synonyms = get_synonyms(answer, n - len(options) + 1)
        options.extend(synonyms)
    
    # If we still don't have enough options, extract other entities from the context
    if len(options) < n + 1:
        doc = nlp(context)
        entities = [ent.text for ent in doc.ents if ent.text.lower() != answer.lower()]
        options.extend(entities[:n - len(options) + 1])
    
    # If we still need more options, add some random words from the context
    if len(options) < n + 1:
        context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
        options.extend(random.sample(context_words, min(n - len(options) + 1, len(context_words))))
    print(f"\n\nAll Possible Options: {options}\n\n")    
    # Ensure we have the correct number of unique options
    options = list(dict.fromkeys(options))[:n+1]
    
    # Shuffle the options
    random.shuffle(options)
    
    return options

async def generate_options_async(answer, context, n=3):
    try:
        options = [answer]
        
        # Add contextually relevant words using a pre-trained model
        context_embedding = await asyncio.to_thread(context_model.encode, context)
        answer_embedding = await asyncio.to_thread(context_model.encode, answer)
        context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]

        # Compute similarity scores and sort context words
        similarity_scores = [util.pytorch_cos_sim(await asyncio.to_thread(context_model.encode, word), answer_embedding).item() for word in context_words]
        sorted_context_words = [word for _, word in sorted(zip(similarity_scores, context_words), reverse=True)]
        options.extend(sorted_context_words[:n])

        # Try to get similar words based on sense2vec
        similar_words = await asyncio.to_thread(get_similar_words_sense2vec, answer, n)
        options.extend(similar_words)
        
        # If we don't have enough options, try synonyms
        if len(options) < n + 1:
            synonyms = await asyncio.to_thread(get_synonyms, answer, n - len(options) + 1)
            options.extend(synonyms)
        
        # Ensure we have the correct number of unique options
        options = list(dict.fromkeys(options))[:n+1]
        
        # Shuffle the options
        random.shuffle(options)
        
        return options
    except Exception as e:
        raise QuestionGenerationError(f"Error in generating options: {str(e)}")