Commit a0593bcf authored by Stergios Papadopoulos's avatar Stergios Papadopoulos
Browse files

generator.py:

 - Improved system prompt.
 - Says hello when get to app.
 - Gave choice weather or not to stream the answer.
 - Created non stream answer method.
 - added generate structured answer method.
 - Made audio generation faster with threads.

 mydoc.py:
parent 90a512c2
Loading
Loading
Loading
Loading
+10 −10
Original line number Diff line number Diff line
@@ -409,18 +409,18 @@ class Embedder(Consts):


# embedder = Embedder()
# embedder.load_docs(directory="aiani dedomena/*", chunking_type=Embedder.ByChar)
# # print(embedder.get_chunks())
#
#
# embedder.delete_collections("all")
# embedder.load_docs(directory="aiani dedomena/to_embed/*", chunking_type=Embedder.ByChar)
# print(embedder.get_chunks())
# #
# #
# # # print(embedder.get_chunks())
# embedder.delete_collections("all")
# # #
# # # # print(embedder.get_chunks())
# embedder.add_data("Mycollection")
#
# print(embedder.search_similar("Mycollection", "Τι είναι η δεξαμενή?", n_results=3))
#
# # embedder.visualize("Mycollection", dimensions=["2d", "3d"])
# #
# # print(embedder.search_similar("Mycollection", "Τι είναι η δεξαμενή?", n_results=3))
# #
# embedder.visualize("Mycollection", dimensions=["2d", "3d"])


+125 −18
Original line number Diff line number Diff line
@@ -2,10 +2,11 @@ from embedder import Embedder
from openai import OpenAI
from dotenv import load_dotenv
import os
from pathlib import Path

class Generator:

    def __init__(self, embedder: Embedder, collection_name, n_results=3):
    def __init__(self, embedder: Embedder, collection_name, n_results=3, model="gpt-4o-mini"):
        """
        Generator that answers questions using the model used to generate the specified collection.
        Just create an instance of the class and use the generate_answer() method to answer questions.
@@ -21,13 +22,19 @@ class Generator:
        self._system_prompt = ("Είσαι ένας ξεναγός του αρχαιολογικού μουσείου Αιανής που βρισκεται στην Κοζάνη (μια μικρή πόλη στην Ελλάδα). "
                               "Στόχος σου είναι να απαντάς στις ερωτήσεις που κάνουν οι επισκέπτες. "
                               "Για κάθε ερώτηση θα σου παρέχεται σχετικά κομμάτια κειμένου τα οποία μπορείς να συμβουλευτείς για να απαντήσεις στην ερώτηση του χρήστη."
                               "Υπάρχει περίπτωση ο χρήστης να σου γράψει κάτι το οποίο δεν χρειάζεται να συμβουλευτείς "
                               "τα σχετικά κομμάτια κειμένου για να απαντήσεις, όπως για παράδειγμα 'Ευχαριστώ πολύ' ή "
                               "'γειά σου' σε αυτές τις περιπτώσεις μην λάβεις υπόψην σου τα σχετικά κομμάτια κειμένου που θα σου δοθούν."
                               "Στην περίπτωση που δεν γνωρίζεις την απάντηση στην ερώτηση που έθεσε ο χρήστης πες με ευγενικό τρόπο πως δεν γνωρίζεις την απάντηση και μήπως θέλει να ρωτήσει κάτι άλλο."
                               "Σε κάθε κομμάτι κειμένου που σου παρέχεται θα υπάρχει και η πηγή απο την οποία προήλθε και θα αναγράφεται στο τέλος του μετά την λέξη κλειδί «Πηγή:», "
                               "αν χρησιμοποιήσεις κάποια απο τα κομμάτια αυτά στο τέλος της απάντησης σου παρέθεσε της πηγές απο τα κομμάτια κειμένου που χρησιμοποίησες γράφοντας «Πηγές: (αναφορά των πηγών σε bullets)»"
                               "Μην βάζεις δικές σου πηγές αλλά μόνο αυτές που αναφέρονται σε κάθε κομμάτι κειμένου μετά την λέξη κλειδή «Πηγή:»")
        self._model = "gpt-4o-mini"

        self._model = model

        self._conversation = [
            {"role": "system", "content": self._system_prompt}
            {"role": "system", "content": self._system_prompt},
            {"role": "assistant", "content": "Γειά σας είμαι ψηφιακός βοηθός του μουσείου Αιανής πως μπορώ να σας βοηθήσω;"}
        ]

        self._embedder = embedder
@@ -45,7 +52,7 @@ class Generator:

        prompt = ""
        prompt += f"{question}\n\n"
        prompt += "Παρακαλώ συμβουλεύσου τα παρακάτω σχετικά με την ερώτηση κείμενα πριν απαντήσεις: \n\n"
        prompt += "Παρακαλώ συμβουλεύσου τα παρακάτω σχετικά με την ερώτηση κείμενα πριν απαντήσεις εάν η ερώτηση του χρήστη το απαιτεί: \n\n"

        similars = self._embedder.search_similar(self._collection_name, question, n_results=self._n_results)
        texts = similars[0]
@@ -67,14 +74,18 @@ class Generator:
        return self._conversation


    def _get_answering_fn(self) -> callable:
    def _get_answering_fn(self, streaming=True) -> callable:
        """
        Gets the answering function based on the model.
        :param streaming: weather or not to stream the answer, default to True.
        :return: The function to be used for answering.
        """
        gpt_models = ["chatgpt-4o-latest", "gpt-4o-mini", "o1-preview"]
        gpt_models = ["chatgpt-4o-latest", "gpt-4o-mini", "o1-preview", "gpt-4o"]
        if self._model in gpt_models:
            if streaming:
                return self._gpt_answering_fn
            else:
                return self._gpt_answering_fn_non_stream
        else:
            raise Exception(f"Currently the only supported models are {', '.join(gpt_models)}")

@@ -108,6 +119,24 @@ class Generator:
            if chunk.choices[0].delta.content is not None:
                yield chunk.choices[0].delta.content

    def _gpt_answering_fn_non_stream(self):
        """
        Generates an answer using the GPT model.
        The method _update_conversation() should be called before calling this method.
        :return: The generated answer as a generator of strings. Each string represents a chunk of the answer.
        """

        # Change conversation to meet openai standards
        conversation = self._fetch_conversation()
        conversation[0]["role"] = "developer"

        completion = self._gpt_client.chat.completions.create(
            model=self._model,
            messages=conversation,
        )

        return completion.choices[0].message.content

    def _is_conversation_empty(self) -> bool:
        """
        Checks if the conversation is empty.
@@ -131,14 +160,14 @@ class Generator:
            print("sources: ", text)
            return text


    ## ====== CALLABLE METHODS ====== ##
    def generate_answer(self, question, model):
    def _prework_for_answ_gen(self, question, model, streaming=True):
        """
        Generates an answer to the given question using the given model.
        :param model: The model to use for generating the answer.
        Preparation for answer generation. Sets the model to the specified one,
        Prepares the prompt, updates the conversation list and gets the answering fn based on the given model.
        :param question: The question to answer.
        :return: generator of strings. Each string represents a chunk of the answer.
        :param model: The model to use for generating the answer.
        :param streaming: weather or not to stream the answer.
        :return: The answering function
        """
        # Set the model to the specified one
        self._model = model
@@ -150,7 +179,23 @@ class Generator:
        self._update_conversation("user", prompt)

        # Get the answering fn based on the given model
        answering_fn = self._get_answering_fn()
        answering_fn = self._get_answering_fn(streaming=streaming)

        return answering_fn



    ## ====== CALLABLE METHODS ====== ##
    def generate_answer(self, question, model):
        """
        Generates an answer to the given question using the given model.
        :param model: The model to use for generating the answer.
        :param question: The question to answer.
        :return: generator of strings. Each string represents a chunk of the answer.
        """

        # Get the answering fn based on the given model
        answering_fn = self._prework_for_answ_gen(question, model, streaming=True)

        answer = ""
        for chunk in answering_fn():
@@ -163,16 +208,45 @@ class Generator:
        # Save answer to the conversation
        self._update_conversation("assistant", answer)

    def generate_answer_structured(self, question, model) -> tuple[str, list[str]]:
    def generate_answer_non_steam(self, question, model) -> str:
        """
        Generates answer without streaming it.
        :param question: The question to answer.
        :param model: The model to use for generating the answer.
        :return: Answer as a string
        """

        # Get the answering fn based on the given model
        answering_fn = self._prework_for_answ_gen(question, model, streaming=False)

        # Generate answer
        answer = answering_fn()

        # Save only the user's question
        self._conversation[-1]["content"] = question

        # Save answer to the conversation
        self._update_conversation("assistant", answer)

        return answer

    def generate_answer_structured(self, question) -> tuple[str, list[str]]:
        """
        Generates answer and provided contexts as structured output.
        :param question: The question for the model to answer.
        :param model: The model to use for answering the question.
        :return: Tuple(answer, contexts)
        """

        # Set the model to the specified one
        self._model = model
        # Set a specific system prompt for evaluation
        self._conversation[0]['content'] = (
            "Είσαι ένας ξεναγός του αρχαιολογικού μουσείου Αιανής που βρισκεται στην Κοζάνη (μια μικρή πόλη στην Ελλάδα). "
            "Στόχος σου είναι να απαντάς στις ερωτήσεις που κάνουν οι επισκέπτες. "
            "Για κάθε ερώτηση θα σου παρέχεται σχετικά κομμάτια κειμένου τα οποία μπορείς να συμβουλευτείς για να απαντήσεις στην ερώτηση του χρήστη."
            "Υπάρχει περίπτωση ο χρήστης να σου γράψει κάτι το οποίο δεν χρειάζεται να συμβουλευτείς "
            "τα σχετικά κομμάτια κειμένου για να απαντήσεις, όπως για παράδειγμα 'Ευχαριστώ πολύ' ή "
            "'γειά σου' σε αυτές τις περιπτώσεις μην λάβεις υπόψην σου τα σχετικά κομμάτια κειμένου που θα σου δοθούν."
            "Στην περίπτωση που δεν γνωρίζεις την απάντηση στην ερώτηση που έθεσε ο χρήστης πες με ευγενικό τρόπο πως δεν γνωρίζεις την απάντηση και μήπως θέλει να ρωτήσει κάτι άλλο."
            "Οι απαντήσεις θα πρέπει να είναι λιτές και να περιέχουν μόνο την απάντηση στην ερώτηση όχι περιττές πληροφορίες.")

        # Prepare the prompt
        prompt, contexts = self._prepare_prompt(question)
@@ -195,6 +269,39 @@ class Generator:

        return answer, contexts

    def get_text_from_audio(self, path) -> str:
        """
        Generates text representation from given audio file path.
        :param path: The path of the audio file
        :return: str
        """

        with open(path, "rb") as audio_file:
            transcript = self._gpt_client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file,
                language="el",
                response_format="text"
            )
        print(f"What came from audio api: {transcript}")
        return transcript

    def text_to_speech(self, text) -> None:
        """
        Streams the generated audio file from the given text.
        :param text: The text to generate to audio.
        :return: None
        """

        speech_file_path = Path(__file__).parent / "speech.mp3"
        response = self._gpt_client.audio.speech.create(
            model="tts-1",
            voice="nova",
            input=text,
        )
        response.stream_to_file(speech_file_path)
        print("finished audio gen...")

    def get_conversation(self):
        pass

+7 −4
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@ class MyDoc(Consts):
            chunk_overlap=chunk_overlap,
            chunk_size=chunk_size,
            length_function=length_function,
            separators=["\n\xa0", "\n\n", "."]
            separators=["\n\xa0", "\n\n", '    ']
        )

        chunks: list = self._text_splitter.split_text(self._text)
@@ -84,7 +84,7 @@ class MyDoc(Consts):
        """Extracts the text from the pdf pages"""

        # Basic text extraction from pdf
        text = " ".join([page.page_content for page in self._pages])
        text: str = " ".join([page.page_content for page in self._pages])
        self._title = self._find_title(text)
        text = text.replace("\n\n", " ").replace("\n", " ")
        # Title extraction
@@ -96,12 +96,12 @@ class MyDoc(Consts):
        :param text: The text to find its title.
        :return: The title.
        """
        stoppers = ["\n\n", "\n", "  ", "\xa0"]
        stoppers = ["\n\n", "\n", "    ", "  ", "\xa0"]
        stop_index = [ind for ind in stoppers if text.find(ind)!=-1]

        if stop_index:
            title = text[:text.index(stop_index[0])]
            if len(title) > 20:
            if len(title) > 60:
                title = text[:6]
        else:
            title = text[:10]
@@ -138,6 +138,9 @@ class MyDoc(Consts):
        """Clears the chunks from unwanted characters. Should be called after the chunks have benn created!!!"""
        chunks = []
        for chunk in self._chunks:
            # exclude chunks that are less than 50 characters in length.
            if len(chunk.page_content) < 50:
                continue
            for item in MyDoc.EXCLUDE:
                chunk.page_content = chunk.page_content.replace(item, "")
            chunks.append(chunk)
+296 −84

File changed.

Preview size limit exceeded, changes collapsed.

+83 −10
Original line number Diff line number Diff line
import os
import threading
import gradio as gr  # Ensure the Gradio library is installed by running: pip install gradio
from generator import Generator
from embedder import Embedder
@@ -7,13 +9,14 @@ class UIController:
    def __init__(self, n_results=5):
        self.gen = Generator(Embedder(), "Mycollection", n_results=n_results)
        self.embedder = Embedder()

        self._prepare_embedder()

        self._used_audio = False # Weather or not user used audio api

    # ===   PRIVATE METHODS   === #
    def _prepare_embedder(self):
        """
        Loads the documents and creates the collection.
        Loads the documents and creates the collection. Don't use it if a collection already exists!!!
        :return: None
        """

@@ -30,6 +33,7 @@ class UIController:
        :return: A tuple that consists of a blank string and the updated history.
        The blank string will be sent at the textbox and the history at the chatbot.
        """

        return "", history + [{"role": "user", "content": user_message}]

    def _bot(self, history: list):
@@ -39,14 +43,54 @@ class UIController:
        :return: A generator of strings. Each string represents a chunk of the answer.
        The answer will be sent at the chatbot.
        """
        print(history)

        question = history[-1]["content"]
        bot_message = self.gen.generate_answer(question, model="gpt-4o-mini")
        bot_message = self.gen.generate_answer_non_steam(question, model="gpt-4o-mini")
        print(type(bot_message))

        # TODO ADD CODE FOR PARALLEL SPEECH CREATION
        # If audio api was used, respond with speech
        if self._used_audio:
            t1 = threading.Thread(target=self.gen.text_to_speech, args=(bot_message,))
            t1.start()

        history.append({"role": "assistant", "content": ""})
        for chunk in bot_message:
            history[-1]['content'] += chunk
            yield history

        # # If audio api was used, respond with speech
        # if self._used_audio:
        #     self.gen.text_to_speech(history[-1]['content'])
        #
        # self._used_audio = False


    def _add_message(self, history, message):
        question = ""

        # Delete previous audio file
        if os.path.exists('speech.mp3'):
            os.remove('speech.mp3')

        if message["files"]:
            question = self.gen.get_text_from_audio(message["files"][0])
            self._used_audio = True
        elif message["text"] != "":
            question = message["text"]

        history.append({"role": "user", "content": question})
        return history, gr.MultimodalTextbox(value=None, interactive=False)

    def _play_audio(self):
        if self._used_audio:
            while True:
                if os.path.exists("speech.mp3"):
                    self._used_audio = False
                    return "speech.mp3"
        return None


    # ====  CALLABLE METHODS   === #
    def create_ui(self, share=False):
        """
@@ -58,16 +102,45 @@ class UIController:
            with gr.Row():
                gr.Markdown("AI Ξεναγός!")

            bot = gr.Chatbot(type="messages")
            msg = gr.Textbox()
            # submit = gr.Button("Submit")
            bot = gr.Chatbot(
                type="messages",
                value=[{"role": "assistant", "content": "Γειά σας πως μπορώ να σας βοηθήσω;"}]
            )
            # msg = gr.Textbox()

            # msg.submit(self._user, [msg, bot], [msg, bot], queue=False).then(
            #     self._bot, bot, bot
            # )

            chat_input = gr.MultimodalTextbox(
                interactive=True,
                placeholder="Γράψτε την ερώτηση σας...",
                show_label=False,
                sources=["microphone"],
                stop_btn=True,
                autoscroll=True,
                autofocus=True,
            )

            # submit.click(fn=self.gen.generate_answer, inputs=[input], outputs=[output]).then
            audio = gr.Audio(
                visible=True,
                autoplay=True,
                interactive=False,
                streaming=True,
            )

            msg.submit(self._user, [msg, bot], [msg, bot], queue=False).then(
                self._bot, bot, bot
            chat_msg = chat_input.submit(
                self._add_message, [bot, chat_input], [bot, chat_input]
            )


            bot_msg = chat_msg.then(self._bot, bot, bot)
            bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])

            bot_msg.then(self._play_audio, None, audio)



        demo.launch(share=share)