Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: moved main to main.py and added some type annotations #154

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions operate/config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import os
import sys

import google.generativeai as genai
from dotenv import load_dotenv
from openai import OpenAI
from prompt_toolkit.shortcuts import input_dialog
import google.generativeai as genai


class Config:
Expand Down Expand Up @@ -72,7 +73,7 @@ def initialize_google(self):

return model

def validation(self, model, voice_mode):
def validation(self, model: str, voice_mode):
"""
Validate the input parameters for the dialog operation.
"""
Expand Down Expand Up @@ -119,3 +120,4 @@ def prompt_and_save_api_key(self, key_name, key_description):
def save_api_key_to_env(key_name, key_value):
with open(".env", "a") as file:
file.write(f"\n{key_name}='{key_value}'")
file.write(f"\n{key_name}='{key_value}'")
122 changes: 120 additions & 2 deletions operate/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,126 @@
Self-Operating Computer
"""
import argparse
from operate.utils.style import ANSI_BRIGHT_MAGENTA
from operate.operate import main
import asyncio
import os
import platform
import sys

from prompt_toolkit import prompt
from prompt_toolkit.shortcuts import message_dialog

from operate.config import Config
from operate.exceptions import ModelNotRecognizedException
from operate.models.apis import get_next_action
from operate.models.prompts import USER_QUESTION, get_system_prompt
from operate.utils.style import (
ANSI_BRIGHT_MAGENTA,
ANSI_GREEN,
ANSI_RED,
ANSI_RESET,
ANSI_YELLOW,
style,
)

config = Config()


def main(model: str, terminal_prompt: str, voice_mode: bool = False):
"""
Main function for the Self-Operating Computer.

Parameters:
- model: The model used for generating responses.
- terminal_prompt: A string representing the prompt provided in the terminal.
- voice_mode: A boolean indicating whether to enable voice mode.

Returns:
None
"""

mic = None
# Initialize `WhisperMic`, if `voice_mode` is True

config.validation(model, voice_mode)

if voice_mode:
try:
from whisper_mic import WhisperMic

# Initialize WhisperMic if import is successful
mic = WhisperMic()
except ImportError:
print(
"Voice mode requires the 'whisper_mic' module. Please install it using 'pip install -r requirements-audio.txt'"
)
sys.exit(1)

# Skip message dialog if prompt was given directly
if not terminal_prompt:
message_dialog(
title="Self-Operating Computer",
text="An experimental framework to enable multimodal models to operate computers",
style=style,
).run()

else:
print("Running direct prompt...")

# # Clear the console
if platform.system() == "Windows":
os.system("cls")
else:
print("\033c", end="")

if terminal_prompt: # Skip objective prompt if it was given as an argument
objective = terminal_prompt
elif voice_mode:
print(
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RESET} Listening for your command... (speak now)"
)
try:
objective = mic.listen()
except Exception as e:
print(f"{ANSI_RED}Error in capturing voice input: {e}{ANSI_RESET}")
return # Exit if voice input fails
else:
print(f"{ANSI_GREEN}[Self-Operating Computer]\n{ANSI_RESET}{USER_QUESTION}")
print(f"{ANSI_YELLOW}[User]{ANSI_RESET}")
objective = prompt(style=style)

system_prompt = get_system_prompt(model, objective)
system_message = {"role": "system", "content": system_prompt}
messages = [system_message]

loop_count = 0

session_id = None

while True:
if VERBOSE:
print("[Self Operating Computer] loop_count", loop_count)
try:
operations, session_id = asyncio.run(
get_next_action(model, messages, objective, session_id)
)

stop = operate(operations)
if stop:
break

loop_count += 1
if loop_count > 10:
break
except ModelNotRecognizedException as e:
print(
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RED}[Error] -> {e} {ANSI_RESET}"
)
break
except Exception as e:
print(
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RED}[Error] -> {e} {ANSI_RESET}"
)
break


def main_entry():
Expand Down
121 changes: 4 additions & 117 deletions operate/operate.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,14 @@
import sys
import os
import time
import asyncio
from prompt_toolkit.shortcuts import message_dialog
from prompt_toolkit import prompt
from operate.exceptions import ModelNotRecognizedException
import platform

# from operate.models.prompts import USER_QUESTION, get_system_prompt
from operate.models.prompts import (
USER_QUESTION,
get_system_prompt,
)
from operate.config import Config
from operate.utils.operating_system import OperatingSystem
from operate.utils.style import (
ANSI_BLUE,
ANSI_BRIGHT_MAGENTA,
ANSI_GREEN,
ANSI_RESET,
ANSI_YELLOW,
ANSI_RED,
ANSI_BRIGHT_MAGENTA,
ANSI_BLUE,
style,
ANSI_RESET,
)
from operate.utils.operating_system import OperatingSystem
from operate.models.apis import get_next_action

# Load configuration
config = Config()
Expand All @@ -32,104 +17,6 @@
VERBOSE = config.verbose


def main(model, terminal_prompt, voice_mode=False):
"""
Main function for the Self-Operating Computer.

Parameters:
- model: The model used for generating responses.
- terminal_prompt: A string representing the prompt provided in the terminal.
- voice_mode: A boolean indicating whether to enable voice mode.

Returns:
None
"""

mic = None
# Initialize `WhisperMic`, if `voice_mode` is True

config.validation(model, voice_mode)

if voice_mode:
try:
from whisper_mic import WhisperMic

# Initialize WhisperMic if import is successful
mic = WhisperMic()
except ImportError:
print(
"Voice mode requires the 'whisper_mic' module. Please install it using 'pip install -r requirements-audio.txt'"
)
sys.exit(1)

# Skip message dialog if prompt was given directly
if not terminal_prompt:
message_dialog(
title="Self-Operating Computer",
text="An experimental framework to enable multimodal models to operate computers",
style=style,
).run()

else:
print("Running direct prompt...")

# # Clear the console
if platform.system() == "Windows":
os.system("cls")
else:
print("\033c", end="")

if terminal_prompt: # Skip objective prompt if it was given as an argument
objective = terminal_prompt
elif voice_mode:
print(
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RESET} Listening for your command... (speak now)"
)
try:
objective = mic.listen()
except Exception as e:
print(f"{ANSI_RED}Error in capturing voice input: {e}{ANSI_RESET}")
return # Exit if voice input fails
else:
print(f"{ANSI_GREEN}[Self-Operating Computer]\n{ANSI_RESET}{USER_QUESTION}")
print(f"{ANSI_YELLOW}[User]{ANSI_RESET}")
objective = prompt(style=style)

system_prompt = get_system_prompt(model, objective)
system_message = {"role": "system", "content": system_prompt}
messages = [system_message]

loop_count = 0

session_id = None

while True:
if VERBOSE:
print("[Self Operating Computer] loop_count", loop_count)
try:
operations, session_id = asyncio.run(
get_next_action(model, messages, objective, session_id)
)

stop = operate(operations)
if stop:
break

loop_count += 1
if loop_count > 10:
break
except ModelNotRecognizedException as e:
print(
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RED}[Error] -> {e} {ANSI_RESET}"
)
break
except Exception as e:
print(
f"{ANSI_GREEN}[Self-Operating Computer]{ANSI_RED}[Error] -> {e} {ANSI_RESET}"
)
break


def operate(operations):
if VERBOSE:
print("[Self Operating Computer][operate]")
Expand Down