ShaderCoder

Runtime error

App Files Files Community

Vipitis commited on Jul 9, 2023

Commit

db24268

•

1 Parent(s): 9f8916a

improved model context

Browse files

Files changed (1) hide show

app.py +39 -7

app.py CHANGED Viewed

@@ -272,10 +272,12 @@ outro_text ="""
  - [] generate whole shaders (via prompts guidance, recursive from errors)
  - [x] accordion with generation parameters (as pipeline_kwargs?) look up starcoder playround and take "inspiration" from there (implemented for both buttons, untested)
  - [] support FIM task for better model context
- - [~] include some context for prompt (title, comments before a functions) - now works with the first comment inside a function body (has to be first)
  - [] gradio examples
  - [] use GPU if available, respect memory restrictions.
- - [~] stream model generation (maybe in a new window?) - WIP for body gen right now -> janky solution works.
 ### Notes:
  - this is meant as a resource to show code generation for a "creative" task.
@@ -295,6 +297,7 @@ new_shadertoy_code = """void mainImage( out vec4 fragColor, in vec2 fragCoord )
 passes_dataset = datasets.load_dataset("Vipitis/Shadertoys")
 single_passes = passes_dataset.filter(lambda x: not x["has_inputs"] and x["num_passes"] == 1) #could also include shaders with no extra functions.
 all_single_passes = datasets.concatenate_datasets([single_passes["train"], single_passes["test"]])
 num_samples = len(all_single_passes)
@@ -448,6 +451,34 @@ def _combine_generation_kwargs(temperature, max_new_tokens, top_p, repetition_pe
     gen_kwargs["repetition_penalty"] = repetition_penalty
     return gen_kwargs
 def alter_body(old_code, func_id, funcs_list: list, temperature, max_new_tokens, top_p, repetition_penalty, pipeline=PIPE):
     """
     Replaces the body of a function with a generated one.
@@ -483,11 +514,12 @@ def alter_body(old_code, func_id, funcs_list: list, temperature, max_new_tokens,
     print(f"{old_code[body_start_idx:body_end_idx]=}")
     model_context = identifier_str # base case
     # add any comments at the beginning of the function to the model_context
-    second_child = func_node.child_by_field_name("body").children[1] #might error out?
-    if second_child.type == "comment":
-        # print(second_child.text.decode())
-        model_context += " { \n  " + second_child.text.decode()
-        print(f"{model_context=}")
     # generation = pipeline(model_context, return_full_text=False, **generation_kwargs)[0]["generated_text"]
     generation = _run_generation(model_context, pipeline, generation_kwargs)
     for i in generation:

  - [] generate whole shaders (via prompts guidance, recursive from errors)
  - [x] accordion with generation parameters (as pipeline_kwargs?) look up starcoder playround and take "inspiration" from there (implemented for both buttons, untested)
  - [] support FIM task for better model context
+ - [x] include some context for prompt (title, comments before a functions) - now takes all comments directly before a function as well as all comments at the beginning inside a function.
  - [] gradio examples
  - [] use GPU if available, respect memory restrictions.
+ - [x] stream model generation (maybe in a new window?) - janky solution and only sometimes hangs up
+ - [] 2nd iFrame needs a lot of fixing (I am not a web developer, need help)
+ - [] (optional) filtering the dataset by license?
 ### Notes:
  - this is meant as a resource to show code generation for a "creative" task.
 passes_dataset = datasets.load_dataset("Vipitis/Shadertoys")
 single_passes = passes_dataset.filter(lambda x: not x["has_inputs"] and x["num_passes"] == 1) #could also include shaders with no extra functions.
+# single_passes = single_passes.filter(lambda x: x["license"] not in "copyright") #to avoid any "do not display this" license?
 all_single_passes = datasets.concatenate_datasets([single_passes["train"], single_passes["test"]])
 num_samples = len(all_single_passes)
     gen_kwargs["repetition_penalty"] = repetition_penalty
     return gen_kwargs
+def _grab_before_comments(func_node):
+    """
+    returns the comments that happen just before a function node
+    """
+    precomment = ""
+    last_comment_line = 0
+    for node in func_node.parent.children: #could you optimize where to iterated from? directon?
+        if node.start_point[0] != last_comment_line + 1:
+            precomment = ""
+        if node.type == "comment":
+            precomment += node.text.decode() + "\n"
+            last_comment_line = node.start_point[0]
+        elif node == func_node:
+            return precomment
+    return precomment
+def _get_docstrings(func_node):
+    """
+    returns the docstring of a function node
+    """
+    docstring = ""
+    for node in func_node.child_by_field_name("body").children[1:]:
+        if node.type == "comment":
+            docstring += node.text.decode() + "\n"
+        else:
+            return docstring
+    return docstring
 def alter_body(old_code, func_id, funcs_list: list, temperature, max_new_tokens, top_p, repetition_penalty, pipeline=PIPE):
     """
     Replaces the body of a function with a generated one.
     print(f"{old_code[body_start_idx:body_end_idx]=}")
     model_context = identifier_str # base case
     # add any comments at the beginning of the function to the model_context
+    # second_child = func_node.child_by_field_name("body").children[1] #might error out?
+    docstring = _get_docstrings(func_node) #might be empty?
+    if docstring:
+        model_context = model_context + "\n{\n" + docstring + "\n"
+    model_context = _grab_before_comments(func_node) + model_context
+    print(f"{model_context=}")
     # generation = pipeline(model_context, return_full_text=False, **generation_kwargs)[0]["generated_text"]
     generation = _run_generation(model_context, pipeline, generation_kwargs)
     for i in generation: