kenken999's picture
First model version
3860419
raw
history blame
5.75 kB
"""
Module `collect` - Data Handling and RudderStack Integration
This module provides functionalities to handle and send learning data to RudderStack
for the purpose of analysis and to improve the gpt-engineer system. The data is sent
only when the user gives consent to share.
Functions:
send_learning(learning): Sends learning data to RudderStack.
collect_learnings(prompt, model, temperature, config, memory, review): Processes and sends learning data.
collect_and_send_human_review(prompt, model, temperature, config, memory): Collects human feedback and sends it.
Dependencies:
hashlib: For generating SHA-256 hash.
typing: For type annotations.
gpt_engineer.core: Core functionalities of gpt-engineer.
gpt_engineer.cli.learning: Handles the extraction of learning data.
Notes:
Data sent to RudderStack is not shared with third parties and is used solely to
improve gpt-engineer and allow it to handle a broader range of use cases.
Consent logic is in gpt_engineer/learning.py.
"""
from typing import Tuple
from gpt_engineer.applications.cli.learning import (
Learning,
Review,
extract_learning,
human_review_input,
)
from gpt_engineer.core.default.disk_memory import DiskMemory
from gpt_engineer.core.prompt import Prompt
def send_learning(learning: Learning):
"""
Send the learning data to RudderStack for analysis.
Parameters
----------
learning : Learning
An instance of the Learning class containing the data to be sent.
Notes
-----
This function is only called if consent is given to share data.
Data is not shared to a third party. It is used with the sole purpose of
improving gpt-engineer, and letting it handle more use cases.
Consent logic is in gpt_engineer/learning.py.
"""
import rudderstack.analytics as rudder_analytics
rudder_analytics.write_key = "2Re4kqwL61GDp7S8ewe6K5dbogG"
rudder_analytics.dataPlaneUrl = "https://gptengineerezm.dataplane.rudderstack.com"
rudder_analytics.track(
user_id=learning.session,
event="learning",
properties=learning.to_dict(), # type: ignore
)
def collect_learnings(
prompt: Prompt,
model: str,
temperature: float,
config: any,
memory: DiskMemory,
review: Review,
):
"""
Collect the learning data and send it to RudderStack for analysis.
Parameters
----------
prompt : str
The initial prompt or question that was provided to the model.
model : str
The name of the model used for generating the response.
temperature : float
The temperature setting used in the model's response generation.
config : any
Configuration parameters used for the learning session.
memory : DiskMemory
An instance of DiskMemory for storing and retrieving data.
review : Review
An instance of Review containing human feedback on the model's response.
Notes
-----
This function attempts to send the learning data to RudderStack. If the data size exceeds
the maximum allowed size, it trims the data and retries sending it.
"""
learnings = extract_learning(prompt, model, temperature, config, memory, review)
try:
send_learning(learnings)
except RuntimeError:
# try to remove some parts of learning that might be too big
# rudderstack max event size is 32kb
max_size = 32 << 10 # 32KB in bytes
current_size = len(learnings.to_json().encode("utf-8")) # get size in bytes
overflow = current_size - max_size
# Add some extra characters for the "[REMOVED...]" string and for safety margin
remove_length = overflow + len(f"[REMOVED {overflow} CHARACTERS]") + 100
learnings.logs = (
learnings.logs[:-remove_length]
+ f"\n\n[REMOVED {remove_length} CHARACTERS]"
)
print(
"WARNING: learning too big, removing some parts. "
"Please report if this results in a crash."
)
try:
send_learning(learnings)
except RuntimeError:
print(
"Sending learnings crashed despite truncation. Progressing without saving learnings."
)
# def steps_file_hash():
# """
# Compute the SHA-256 hash of the steps file.
#
# Returns
# -------
# str
# The SHA-256 hash of the steps file.
# """
# with open(steps.__file__, "r") as f:
# content = f.read()
# return hashlib.sha256(content.encode("utf-8")).hexdigest()
def collect_and_send_human_review(
prompt: Prompt,
model: str,
temperature: float,
config: Tuple[str, ...],
memory: DiskMemory,
):
"""
Collects human feedback on the code and sends it for analysis.
Parameters
----------
prompt : str
The initial prompt or question that was provided to the model.
model : str
The name of the model used for generating the response.
temperature : float
The temperature setting used in the model's response generation.
config : Tuple[str, ...]
Configuration parameters used for the learning session.
memory : DiskMemory
An instance of DiskMemory for storing and retrieving data.
Returns
-------
None
Notes
-----
This function prompts the user for a review of the generated or improved code using the
`human_review_input` function. If a valid review is provided, it's serialized to JSON format
and stored within the database's memory under the "review" key.
"""
review = human_review_input()
if review:
collect_learnings(prompt, model, temperature, config, memory, review)