File size: 2,219 Bytes
65246cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
from dotenv import load_dotenv
from langchain_google_genai import GoogleGenerativeAI
from langchain.prompts import PromptTemplate;
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA;
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.embeddings import HuggingFaceInstructEmbeddings

# Load all .env variables
load_dotenv()

api_key = os.environ.get("GOOGLE_API_KEY")

llm = GoogleGenerativeAI(model="gemini-pro", temperature=0,google_api_key=api_key)


# Convert data into embeddings
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
local_file_path= "faiss_db"

# Store data in vector database for semantic search
def store_vector_db():
    # Load CSV Data
    loader = CSVLoader(file_path="sampledata.csv", source_column="prompt")
    data = loader.load()
    
    vector_db = FAISS.from_documents(documents=data, embedding=embeddings)
    vector_db.save_local(local_file_path)

# Retreive data stored in vector database and pass to LLM
def get_retieval_chain():
      # Load the vector database from the local folder
    vectordb = FAISS.load_local(local_file_path, embeddings,allow_dangerous_deserialization=True)

    # Create a retriever for querying the vector database
    retriever = vectordb.as_retriever(score_threshold=0.7)
    
    # Create prompt to decrease hallucinations and return custom msg when no data found
    prompt_template = """Given the following context and a question, generate an answer based on this context only.
    In the answer try to provide as much text as possible from "response" section in the source document context without making much changes.
    If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer.

    CONTEXT: {context}

    QUESTION: {question}"""

    PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    
    chain = RetrievalQA.from_chain_type(llm=llm,
        chain_type="stuff",
        retriever=retriever,
        input_key="query",
        return_source_documents=True,
        chain_type_kwargs={"prompt": PROMPT}
        )
    return chain