-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
125 lines (111 loc) · 5.53 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# importing Libraries
import subprocess
# Define the command to install dependencies
install_command = ['pip', 'install', '-r', 'requirements.txt']
# Run the command to install the dependencies
subprocess.check_call(install_command)
import os
import time
import github
import pandas as pd
import streamlit as st
from dotenv import load_dotenv
from utilities import *
# Defining the main function
def main(username, git_access_token):
"""Takes 2 parameter as input, username and github_access_token;
returns the saved data into a csv file"""
if username:
try:
# STEP - 1 : FETCHING THE REPO NAMES
x = time.time()
user, repo_names, repo_descriptions, repo_urls = display_repo_names_and_url(username, git_access_token)
y = time.time()
st.write(f"Repo Names Has been fetched . Time taken for fetching repo names : {y - x} sec")
print('#' * 50)
all_repo_details = {}
cnt = 0
# STEP - 2 : FETCHING THE RAW DATA FROM EACH REPO
st.write("Proceeding to fetch raw text.")
for repo in repo_names:
cnt += 1
raw_text, total_files = get_each_repo_data(user, repo)
if total_files == 0 or total_files is None:
continue
if raw_text:
all_repo_details[repo] = raw_text
else:
continue
st.write("Data Fetching Process Completed.")
# STEP - 3 : CONVERT THE DATA INTO A STRUCTURED FORMAT
st.write("Proceeding to convert fetched data into a csv file...")
x = time.time()
df = pd.DataFrame(columns=["repo", "file","url", "loc", "lloc", "sloc", "blank_lines", "comments",
"multiline_comments", "single_comments", "cyclomatic_complexity",
"halstead_report"])
data_rows = []
index_value = -1
for rep, files in all_repo_details.items():
index_value += 1
desription = repo_descriptions.get(f'{rep}', '')
repo_url = repo_urls[index_value]
for file, file_info in files.items():
loc = file_info.get('loc', '')
lloc = file_info.get('lloc', '')
sloc = file_info.get('sloc', '')
blank_lines = file_info.get('blank_lines', '')
comments = file_info.get('comments', '')
multiline_comments = file_info.get('multiline_comments', '')
single_comments = file_info.get('single_comments', '')
cyclomatic_complexity = file_info.get('cyclomatic_complexity', '')
halstead_report = file_info.get('halstead_report', '')
# Append a row to the DataFrame
data_rows.append({
"repo": rep,
"description":desription,
"url": repo_url,
"file": file,
"loc": loc,
"lloc": lloc,
"sloc": sloc,
"comments": comments,
"blank_lines": blank_lines,
"multiline_comments": multiline_comments,
"single_comments": single_comments,
"cyclomatic_complexity": cyclomatic_complexity,
"halstead_report": halstead_report
})
df = pd.concat([df, pd.DataFrame(data_rows)], ignore_index=True)
# saving the dataframe into a csv
df.to_csv("repo_details.csv")
y = time.time()
st.write(f"CSV file saved. Time taken {y - x}")
except github.RateLimitExceededException:
print('Rate Limit exceeded...wait for an hour and try again...')
# else:
# print('Invalid Username')
if __name__ == "__main__":
# load the dotenv for accessing the api keys
load_dotenv()
git_access_token = os.getenv("GITHUB_API_KEY") # GITHUB Api Key
# openai_api_key = os.getenv("OPENAI_API_KEY") # OpenAI Api Key
st.title('Github Automated Analysis | Most Technically Complex And Challenging Repo')
github_url = st.text_input("Please Enter The GitHub User's URL or Username For Analysis")
# git_access_token = st.text_input("Please provide the Github API key", type="password")
openai_api_key = st.text_input("Please provide the OpenAI API key", type="password")
if len(github_url.split('/')) > 1:
username = github_url.split('/')[-1]
else:
username = github_url
# Running the code
with st.form('Github_repo_analysis', clear_on_submit=True):
submitted = st.form_submit_button('Search')
if submitted and openai_api_key.startswith('sk-'):
with st.spinner('Calculating...'):
# main function to process the data... STEP - 1, STEP - 2, STEP - 3
main(username, git_access_token)
# STEP - 4 : CALCULATING THE MOST COMPLEX REPO THROUGH OPENAI
st.write("Data Fetched....Proceeding to calculation...")
response = get_results(openai_api_key) # function to get the results from OpenAI
st.write(response)
#####################################################################################