diff --git a/constants.py b/constants.py index b7ccc11b4..2717255d9 100644 --- a/constants.py +++ b/constants.py @@ -1,4 +1,26 @@ +import os + EXTENSION_TO_SKIP = [".png",".jpg",".jpeg",".gif",".bmp",".svg",".ico",".tif",".tiff"] DEFAULT_DIR = "generated" -DEFAULT_MODEL = "gpt-3.5-turbo" # we recommend 'gpt-4' if you have it # gpt3.5 is going to be worse at generating code so we strongly recommend gpt4. i know most people dont have access, we are working on a hosted version -DEFAULT_MAX_TOKENS = 2000 # i wonder how to tweak this properly. we dont want it to be max length as it encourages verbosity of code. but too short and code also truncates suddenly. \ No newline at end of file + +try: + USE_FULL_PROJECT_PROMPT = bool(os.environ["USE_FULL_PROJECT_PROMPT"]) and os.environ["USE_FULL_PROJECT_PROMPT"] != 'False' and os.environ["USE_FULL_PROJECT_PROMPT"] != "0" +except KeyError: + # If enabled for each file generation prompt we will include all the files generated before + # It helps to make code much more consistent + # But requires at least 16k context model even for a small project + USE_FULL_PROJECT_PROMPT = False + +print(USE_FULL_PROJECT_PROMPT) + +# https://platform.openai.com/docs/models/gpt-4 +try: + DEFAULT_MODEL = os.environ["OPENAI_DEFAULT_MODEL"] +except KeyError: + # we recommend 'gpt-4' if you have it # gpt3.5 is going to be worse at generating code so we strongly recommend gpt4. i know most people dont have access, we are working on a hosted version + DEFAULT_MODEL = "gpt-3.5-turbo" +try: + DEFAULT_MAX_TOKENS = int(os.environ["OPENAI_DEFAULT_MAX_TOKENS"]) +except KeyError: + # i wonder how to tweak this properly. we dont want it to be max length as it encourages verbosity of code. but too short and code also truncates suddenly. + DEFAULT_MAX_TOKENS = 2000 \ No newline at end of file diff --git a/main.py b/main.py index 9d7ecd5db..61f50882c 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,9 @@ import os +import re import modal import ast from utils import clean_dir -from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS +from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS, USE_FULL_PROJECT_PROMPT stub = modal.Stub("smol-developer-v1") # yes we are recommending using Modal by default, as it helps with deployment. see readme for why. openai_image = modal.Image.debian_slim().pip_install("openai", "tiktoken") @@ -47,10 +48,12 @@ def reportTokens(prompt): params = { "model": model, "messages": messages, - "max_tokens": DEFAULT_MAX_TOKENS, "temperature": 0, } + if DEFAULT_MAX_TOKENS != 0: + params.max_tokens = DEFAULT_MAX_TOKENS + # Send the API request response = openai.ChatCompletion.create(**params) @@ -60,17 +63,18 @@ def reportTokens(prompt): @stub.function() -def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_dependencies=None, prompt=None): +def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_dependencies=None, prompt=None, generated_files_content=None): # call openai api with this prompt filecode = generate_response.call(model, f"""You are an AI developer who is trying to write a program that will generate code for the user based on their intent. - + the app is: {prompt} the files we have decided to generate are: {filepaths_string} - the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies} - + the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}""" + + (f"already generated files are:\n {generated_files_content}" if (USE_FULL_PROJECT_PROMPT and generated_files_content) else "") + + f""" only write valid code for the given filepath and file type, and return only the code. do not add any other explanation, only return valid code for that file type. """, @@ -97,7 +101,7 @@ def generate_file(filename, model=DEFAULT_MODEL, filepaths_string=None, shared_d """, ) - return filename, filecode + return filename, get_code_from_string(filecode) @stub.local_entrypoint() @@ -163,17 +167,33 @@ def main(prompt, directory=DEFAULT_DIR, model=DEFAULT_MODEL, file=None): print(shared_dependencies) # write shared dependencies as a md file inside the generated directory write_file("shared_dependencies.md", shared_dependencies, directory) - + generated_files_content = "" # Iterate over generated files and write them to the specified directory for filename, filecode in generate_file.map( - list_actual, order_outputs=False, kwargs=dict(model=model, filepaths_string=filepaths_string, shared_dependencies=shared_dependencies, prompt=prompt) + list_actual, order_outputs=False, kwargs=dict(model=model, filepaths_string=filepaths_string, shared_dependencies=shared_dependencies, prompt=prompt, generated_files_content=generated_files_content) ): write_file(filename, filecode, directory) + generated_files_content += f"{directory}/{filename}\n" + generated_files_content += "\n" + generated_files_content += filecode + generated_files_content += "\n" except ValueError: print("Failed to parse result") +# sometimes GPT-3.5 still returns some words around the content of the file +# example: +# # Makefile +# ```makefile +# contents +# ```` +def get_code_from_string(input_string): + match = re.search(r'```[^\n]*?\n([\s\S]+?)\n```', input_string) + if match: + return match.group(1) + else: + return input_string def write_file(filename, filecode, directory): # Output the filename in blue color diff --git a/main_no_modal.py b/main_no_modal.py index b3edba516..15df37e53 100644 --- a/main_no_modal.py +++ b/main_no_modal.py @@ -1,9 +1,10 @@ import sys import os +import re import ast from time import sleep from utils import clean_dir -from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS +from constants import DEFAULT_DIR, DEFAULT_MODEL, DEFAULT_MAX_TOKENS, USE_FULL_PROJECT_PROMPT def generate_response(system_prompt, user_prompt, *args): import openai @@ -40,10 +41,12 @@ def reportTokens(prompt): params = { "model": DEFAULT_MODEL, "messages": messages, - "max_tokens": DEFAULT_MAX_TOKENS, "temperature": 0, } + if DEFAULT_MAX_TOKENS != 0: + params.max_tokens = DEFAULT_MAX_TOKENS + # Send the API request keep_trying = True while keep_trying: @@ -62,7 +65,7 @@ def reportTokens(prompt): def generate_file( - filename, filepaths_string=None, shared_dependencies=None, prompt=None + filename, filepaths_string=None, shared_dependencies=None, prompt=None, generated_files_content=None ): # call openai api with this prompt filecode = generate_response( @@ -72,8 +75,9 @@ def generate_file( the files we have decided to generate are: {filepaths_string} - the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies} - + the shared dependencies (like filenames and variable names) we have decided on are: {shared_dependencies}""" + + (f"already generated files are:\n {generated_files_content}" if (USE_FULL_PROJECT_PROMPT and generated_files_content) else "") + + f""" only write valid code for the given filepath and file type, and return only the code. do not add any other explanation, only return valid code for that file type. """, @@ -100,7 +104,7 @@ def generate_file( """, ) - return filename, filecode + return filename, get_code_from_string(filecode) def main(prompt, directory=DEFAULT_DIR, file=None): @@ -174,19 +178,37 @@ def main(prompt, directory=DEFAULT_DIR, file=None): print(shared_dependencies) # write shared dependencies as a md file inside the generated directory write_file("shared_dependencies.md", shared_dependencies, directory) - + generated_files_content = "" for name in list_actual: filename, filecode = generate_file( name, filepaths_string=filepaths_string, shared_dependencies=shared_dependencies, prompt=prompt, + generated_files_content=generated_files_content, ) write_file(filename, filecode, directory) + generated_files_content += f"{directory}/{filename}\n" + generated_files_content += "\n" + generated_files_content += filecode + generated_files_content += "\n" + except ValueError: print("Failed to parse result: " + result) +# sometimes GPT-3.5 still returns some words around the content of the file +# example: +# # Makefile +# ```makefile +# contents +# ```` +def get_code_from_string(input_string): + match = re.search(r'```[^\n]*?\n([\s\S]+?)\n```', input_string) + if match: + return match.group(1) + else: + return input_string def write_file(filename, filecode, directory): # Output the filename in blue color