Commit 76b99c23 authored by Stergios Papadopoulos's avatar Stergios Papadopoulos
Browse files

- Added generator module that generates answers to user's questions. In the...

- Added generator module that generates answers to user's questions. In the curent state it works in the command line.
- Future work is to further clear chunks (keywords and unnecessary text should gone) and to create the UI.
parent 248773d9
Loading
Loading
Loading
Loading
+6 −8
Original line number Diff line number Diff line
import glob
import random
from typing import Any

from mydoc import MyDoc
from constantscls import Consts
from openai import OpenAI
@@ -19,7 +18,7 @@ class Embedder(Consts):
    def __init__(self, db_name="chromadb"):
        """
        In call a chroma db is created named as specified.
        Call vectorize() to create a collection and add the data stored in _chunks list.
        Call add_data() to create a collection and add the data stored in _chunks list.
        :param db_name: The name of the database.
        """

@@ -285,7 +284,7 @@ class Embedder(Consts):
        return collection.query(
            query_texts=query_text,
            n_results=n_results
        )
        )["documents"][0]

    def count(self, collection_name) -> int:
        """
@@ -344,8 +343,7 @@ class Embedder(Consts):
        # For 2d
        if "2d" in dimensions:
            tsne = TSNE(n_components=2, random_state=42)
            reduced_vectors = tsne.fit_transform(embeddings)
            print(reduced_vectors)
            reduced_vectors = tsne.fit_transform(original_embeddings)
            fig = plt.Figure(data=[plt.Scatter(
                x=reduced_vectors[:, 0],
                y=reduced_vectors[:, 1],
@@ -368,7 +366,7 @@ class Embedder(Consts):
        # For 3d
        if "3d" in dimensions:
            tsne = TSNE(n_components=3, random_state=42)
            reduced_vectors = tsne.fit_transform(embeddings)
            reduced_vectors = tsne.fit_transform(original_embeddings)
            print(reduced_vectors)
            fig = plt.Figure(data=[plt.Scatter3d(
                x=reduced_vectors[:, 0],
@@ -398,9 +396,9 @@ embedder = Embedder()
# # print(embedder.get_chunks())
# embedder.add_data("Mycollection")

# print(embedder.search_similar("Mycollection", "Τι είναι η δεξαμενή?", n_results=3))
print(embedder.search_similar("Mycollection", "Τι είναι η δεξαμενή?", n_results=3))

embedder.visualize("Mycollection", dimensions=["2d", "3d"])
# embedder.visualize("Mycollection", dimensions=["2d", "3d"])


generator.py

0 → 100644
+150 −0
Original line number Diff line number Diff line
from embedder import Embedder
from openai import OpenAI
from dotenv import load_dotenv
import os

class Generator:

    def __init__(self, embedder: Embedder, collection_name, n_results=3):
        """
        Generator that answers questions using the model used to generate the specified collection.
        Just create an instance of the class and use the generate_answer() method to answer questions.
        :param embedder: The embedder to use for generating the relevant to the user's question context.
        :param collection_name: The name of the collection that stores the relevant chunks to the user's question context.
        :param n_results: The number of relevant chunks to return. Default is 3.
        """
        load_dotenv()
        os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

        # Initializations
        self._gpt_client = OpenAI()
        self._system_prompt = ("Είσαι ένας ξεναγός του αρχαιολογικού μουσείου Αιανής που βρισκεται στην Κοζάνη (μια μικρή πόλη στην Ελλάδα). "
                               "Στόχος σου είναι να απαντάς στις ερωτήσεις που κάνουν οι επισκέπτες. "
                               "Για κάθε ερώτηση θα σου παρέχεται σχετικά κομμάτια κειμένου τα οποία μπορείς να συμβουλευτείς για να απαντήσεις στην ερώτηση του χρήστη."
                               "Στην περίπτωση που δεν γνωρίζεις την απάντηση στην ερώτηση που έθεσε ο χρήστης πες με ευγενικό τρόπο πως δεν γνωρίζεις την απάντηση και μήπως θέλει να ρωτήσει κάτι άλλο.")
        self._model = "gpt-4o-mini"
        self._conversation = [
            {"role": "system", "content": self._system_prompt}
        ]

        self._embedder = embedder
        self._collection_name = collection_name
        self._n_results = n_results

    ## ====== PRIVATE METHODS ====== ##
    def _prepare_prompt(self, question: str) -> str:
        """
        Creates a prompt for the GPT model.
        The prompt will be added to the self._conversation.
        :param question: The user's question.
        :return: The prompt for the GPT model.
        """

        prompt = ""
        prompt += f"{question}\n\n"
        prompt += "Παρακαλώ συμβουλεύσου τα παρακάτω σχετικά με την ερώτηση κείμενα πριν απαντήσεις: \n\n"
        for chunk in self._embedder.search_similar(self._collection_name, question, n_results=self._n_results):
            prompt += chunk + "\n\n"

        print(prompt)
        return prompt

    def _fetch_conversation(self) -> list[dict[str, str]]:
        """
        Fetches the conversation from the self._conversation. Raise exception if empty.
        :return: List of conversation messages
        """

        if self._is_conversation_empty():
            raise Exception("Conversation is empty. Use _update_conversation() to add messages to the conversation.")
        return self._conversation


    def _get_answering_fn(self) -> callable:
        """
        Gets the answering function based on the model.
        :return: The function to be used for answering.
        """
        gpt_models = ["chatgpt-4o-latest", "gpt-4o-mini", "o1-preview"]
        if self._model in gpt_models:
            return self._gpt_answering_fn
        else:
            raise Exception(f"Currently the only supported models are {', '.join(gpt_models)}")

    def _update_conversation(self, role, content) -> None:
        """
        Updates the conversation with the given role and content.
        :param role: The role of the message.
        :param content: The content of the message.
        :return: None
        """
        self._conversation.append({"role": role, "content": content})

    def _gpt_answering_fn(self):
        """
        Generates an answer using the GPT model.
        The method _update_conversation() should be called before calling this method.
        :return: The generated answer as a generator of strings. Each string represents a chunk of the answer.
        """

        # Change conversation to meet openai standards
        conversation = self._fetch_conversation()
        conversation[0]["role"] = "developer"

        stream = self._gpt_client.chat.completions.create(
            model=self._model,
            messages=conversation,
            stream=True,
        )

        for chunk in stream:
            if chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

    def _is_conversation_empty(self) -> bool:
        """
        Checks if the conversation is empty.
        :return: True if empty else False
        """
        return True if not self._conversation else False

    ## ====== CALLABLE METHODS ====== ##
    def generate_answer(self, question, model) -> None:
        """
        Generates an answer to the given question using the given model.
        :param model: The model to use for generating the answer.
        :param question: The question to answer.
        :return: None
        """
        # Set the model to the specified one
        self._model = model

        # Prepare the prompt
        prompt = self._prepare_prompt(question)

        # Update the conversation list
        self._update_conversation("user", prompt)

        # Get the answering fn based on the given model
        answering_fn = self._get_answering_fn()

        answer = ""
        for chunk in answering_fn():
            print(chunk, end="")
            answer += chunk

        # Save answer to the conversation
        self._update_conversation("assistant", answer)

    def get_conversation(self):
        pass

embedder = Embedder()
gen = Generator(embedder=embedder, collection_name="Mycollection", n_results=5)

while True:
    gen.generate_answer(input("Ask a question: "), model="gpt-4o-mini")
    print("\n")