From 605c599b5d4417e10b627c197c7ee021d9e3cb98 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 8 Feb 2023 19:06:23 +0000 Subject: [PATCH] Create code_docs_gen.py --- scripts/code_docs_gen.py | 126 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 scripts/code_docs_gen.py diff --git a/scripts/code_docs_gen.py b/scripts/code_docs_gen.py new file mode 100644 index 00000000..a2170eaa --- /dev/null +++ b/scripts/code_docs_gen.py @@ -0,0 +1,126 @@ +from pathlib import Path +from langchain.text_splitter import CharacterTextSplitter +import faiss +from langchain.vectorstores import FAISS +from langchain.embeddings import OpenAIEmbeddings +from langchain.llms import OpenAI +from langchain.prompts import PromptTemplate +import pickle +import dotenv +import tiktoken +import sys +from argparse import ArgumentParser +import ast + +dotenv.load_dotenv() + + +ps = list(Path("inputs").glob("**/*.py")) +data = [] +sources = [] +for p in ps: + with open(p) as f: + data.append(f.read()) + sources.append(p) + + + +# with open('inputs/client.py', 'r') as f: +# tree = ast.parse(f.read()) + +# print(tree) + + +def get_functions_in_class(node): + functions = [] + functions_code = [] + for child in node.body: + if isinstance(child, ast.FunctionDef): + functions.append(child.name) + functions_code.append(ast.unparse(child)) + + return functions, functions_code + + +def get_classes_and_functions(source_code): + tree = ast.parse(source_code) + classes = {} + for node in tree.body: + if isinstance(node, ast.ClassDef): + class_name = node.name + function_name, function = get_functions_in_class(node) + # join function name and function code + functions = dict(zip(function_name, function)) + classes[class_name] = functions + return classes + + +structure_dict = {} +c1 = 0 +for code in data: + classes = get_classes_and_functions(ast.parse(code)) + source = str(sources[c1]) + structure_dict[source] = classes + c1 += 1 + +# save the structure dict as json +import json +with open('structure_dict.json', 'w') as f: + json.dump(structure_dict, f) + + +# llm = OpenAI(temperature=0) +# prompt = PromptTemplate( +# input_variables=["code"], +# template="Code: {code}, Documentation: ", +# ) +# +# print(prompt.format(code="print('hello world')")) +# print(llm(prompt.format(code="print('hello world')"))) + + +if not Path("outputs").exists(): + Path("outputs").mkdir() + +c1 = len(structure_dict) +c2 = 0 +for source, classes in structure_dict.items(): + c2 += 1 + print(f"Processing file {c2}/{c1}") + f1 = len(classes) + f2 = 0 + for class_name, functions in classes.items(): + f2 += 1 + print(f"Processing class {f2}/{f1}") + source_w = source.replace("inputs/", "") + source_w = source_w.replace(".py", ".txt") + if not Path(f"outputs/{source_w}").exists(): + with open(f"outputs/{source_w}", "w") as f: + f.write(f"Class: {class_name}") + else: + with open(f"outputs/{source_w}", "a") as f: + f.write(f"\n\nClass: {class_name}") + # append class name to the front + for function in functions: + b1 = len(functions) + b2 = 0 + print(f"Processing function {b2}/{b1}") + b2 += 1 + prompt = PromptTemplate( + input_variables=["code"], + template="Code: \n{code}, \nDocumentation: ", + ) + llm = OpenAI(temperature=0) + response = llm(prompt.format(code=functions[function])) + + if not Path(f"outputs/{source_w}").exists(): + with open(f"outputs/{source_w}", "w") as f: + f.write(f"Function: {functions[function]}, \nDocumentation: {response}") + else: + with open(f"outputs/{source_w}", "a") as f: + f.write(f"\n\nFunction: {functions[function]}, \nDocumentation: {response}") + + + + +