Spaces:

kenken999
/

fastapi_django_main_live

Running on Zero

File size: 5,749 Bytes
"""
Module `collect` - Data Handling and RudderStack Integration

This module provides functionalities to handle and send learning data to RudderStack
for the purpose of analysis and to improve the gpt-engineer system. The data is sent
only when the user gives consent to share.

Functions:
    send_learning(learning): Sends learning data to RudderStack.
    collect_learnings(prompt, model, temperature, config, memory, review): Processes and sends learning data.
    collect_and_send_human_review(prompt, model, temperature, config, memory): Collects human feedback and sends it.

Dependencies:
    hashlib: For generating SHA-256 hash.
    typing: For type annotations.
    gpt_engineer.core: Core functionalities of gpt-engineer.
    gpt_engineer.cli.learning: Handles the extraction of learning data.

Notes:
    Data sent to RudderStack is not shared with third parties and is used solely to
    improve gpt-engineer and allow it to handle a broader range of use cases.
    Consent logic is in gpt_engineer/learning.py.
"""

from typing import Tuple

from gpt_engineer.applications.cli.learning import (
    Learning,
    Review,
    extract_learning,
    human_review_input,
)
from gpt_engineer.core.default.disk_memory import DiskMemory
from gpt_engineer.core.prompt import Prompt


def send_learning(learning: Learning):
    """
    Send the learning data to RudderStack for analysis.

    Parameters
    ----------
    learning : Learning
        An instance of the Learning class containing the data to be sent.

    Notes
    -----
    This function is only called if consent is given to share data.
    Data is not shared to a third party. It is used with the sole purpose of
    improving gpt-engineer, and letting it handle more use cases.
    Consent logic is in gpt_engineer/learning.py.
    """
    import rudderstack.analytics as rudder_analytics

    rudder_analytics.write_key = "2Re4kqwL61GDp7S8ewe6K5dbogG"
    rudder_analytics.dataPlaneUrl = "https://gptengineerezm.dataplane.rudderstack.com"

    rudder_analytics.track(
        user_id=learning.session,
        event="learning",
        properties=learning.to_dict(),  # type: ignore
    )


def collect_learnings(
    prompt: Prompt,
    model: str,
    temperature: float,
    config: any,
    memory: DiskMemory,
    review: Review,
):
    """
    Collect the learning data and send it to RudderStack for analysis.

    Parameters
    ----------
    prompt : str
        The initial prompt or question that was provided to the model.
    model : str
        The name of the model used for generating the response.
    temperature : float
        The temperature setting used in the model's response generation.
    config : any
        Configuration parameters used for the learning session.
    memory : DiskMemory
        An instance of DiskMemory for storing and retrieving data.
    review : Review
        An instance of Review containing human feedback on the model's response.

    Notes
    -----
    This function attempts to send the learning data to RudderStack. If the data size exceeds
    the maximum allowed size, it trims the data and retries sending it.
    """
    learnings = extract_learning(prompt, model, temperature, config, memory, review)
    try:
        send_learning(learnings)
    except RuntimeError:
        # try to remove some parts of learning that might be too big
        # rudderstack max event size is 32kb
        max_size = 32 << 10  # 32KB in bytes
        current_size = len(learnings.to_json().encode("utf-8"))  # get size in bytes

        overflow = current_size - max_size

        # Add some extra characters for the "[REMOVED...]" string and for safety margin
        remove_length = overflow + len(f"[REMOVED {overflow} CHARACTERS]") + 100

        learnings.logs = (
            learnings.logs[:-remove_length]
            + f"\n\n[REMOVED {remove_length} CHARACTERS]"
        )

        print(
            "WARNING: learning too big, removing some parts. "
            "Please report if this results in a crash."
        )
        try:
            send_learning(learnings)
        except RuntimeError:
            print(
                "Sending learnings crashed despite truncation. Progressing without saving learnings."
            )


# def steps_file_hash():
#     """
#     Compute the SHA-256 hash of the steps file.
#
#     Returns
#     -------
#     str
#         The SHA-256 hash of the steps file.
#     """
#     with open(steps.__file__, "r") as f:
#         content = f.read()
#         return hashlib.sha256(content.encode("utf-8")).hexdigest()


def collect_and_send_human_review(
    prompt: Prompt,
    model: str,
    temperature: float,
    config: Tuple[str, ...],
    memory: DiskMemory,
):
    """
    Collects human feedback on the code and sends it for analysis.

    Parameters
    ----------
    prompt : str
        The initial prompt or question that was provided to the model.
    model : str
        The name of the model used for generating the response.
    temperature : float
        The temperature setting used in the model's response generation.
    config : Tuple[str, ...]
        Configuration parameters used for the learning session.
    memory : DiskMemory
        An instance of DiskMemory for storing and retrieving data.

    Returns
    -------
    None

    Notes
    -----
    This function prompts the user for a review of the generated or improved code using the
    `human_review_input` function. If a valid review is provided, it's serialized to JSON format
    and stored within the database's memory under the "review" key.
    """

    review = human_review_input()
    if review:
        collect_learnings(prompt, model, temperature, config, memory, review)