Skip to content
This repository has been archived by the owner on Aug 21, 2023. It is now read-only.

Commit

Permalink
QnA Example - Peformance and Retry mechanism Improvements (#13)
Browse files Browse the repository at this point in the history
* Backup

* Backup

* Works

* undo

* Document code better

* Add missing projects in solution
  • Loading branch information
nir-schleyen authored Aug 15, 2023
1 parent 2e78195 commit ca066b0
Show file tree
Hide file tree
Showing 10 changed files with 444 additions and 398 deletions.
12 changes: 12 additions & 0 deletions dotnet/SK-dotnet.sln
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connectors.Memory.Kusto", "
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "PlayFabExamples", "samples\PlayFabExamples\PlayFabExamples.csproj", "{C76B2394-0199-461B-8642-79734D7A64DF}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "pf-extensions", "pf-extensions", "{E63F71D6-7CCE-499F-9310-8A92149068AB}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Planning.ChatStepwisePlanner", "src\PF-Extensions\Planning.ChatStepwisePlanner\Planning.ChatStepwisePlanner.csproj", "{AF4314B7-FDD1-4280-9F2F-76206F7D5EC3}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -377,6 +381,12 @@ Global
{C76B2394-0199-461B-8642-79734D7A64DF}.Publish|Any CPU.Build.0 = Debug|Any CPU
{C76B2394-0199-461B-8642-79734D7A64DF}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C76B2394-0199-461B-8642-79734D7A64DF}.Release|Any CPU.Build.0 = Release|Any CPU
{AF4314B7-FDD1-4280-9F2F-76206F7D5EC3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{AF4314B7-FDD1-4280-9F2F-76206F7D5EC3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{AF4314B7-FDD1-4280-9F2F-76206F7D5EC3}.Publish|Any CPU.ActiveCfg = Publish|Any CPU
{AF4314B7-FDD1-4280-9F2F-76206F7D5EC3}.Publish|Any CPU.Build.0 = Publish|Any CPU
{AF4314B7-FDD1-4280-9F2F-76206F7D5EC3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{AF4314B7-FDD1-4280-9F2F-76206F7D5EC3}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -431,6 +441,8 @@ Global
{C754950A-E16C-4F96-9CC7-9328E361B5AF} = {FA3720F1-C99A-49B2-9577-A940257098BF}
{E07608CC-D710-4655-BB9E-D22CF3CDD193} = {0247C2C9-86C3-45BA-8873-28B0948EDC0C}
{C76B2394-0199-461B-8642-79734D7A64DF} = {FA3720F1-C99A-49B2-9577-A940257098BF}
{E63F71D6-7CCE-499F-9310-8A92149068AB} = {831DDCA2-7D2C-4C31-80DB-6BDB3E1F7AE0}
{AF4314B7-FDD1-4280-9F2F-76206F7D5EC3} = {E63F71D6-7CCE-499F-9310-8A92149068AB}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {FBDC56A3-86AD-4323-AA0F-201E59123B83}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public class AzureOpenAIConfig
public string ServiceId { get; set; }
public string DeploymentName { get; set; }
public string ChatDeploymentName { get; set; }
public string EmbeddingDeploymentName { get; set; }
public string Endpoint { get; set; }
public string ApiKey { get; set; }
}
Expand Down
331 changes: 214 additions & 117 deletions dotnet/samples/PlayFabExamples/Example01_DataQnA/Example01_DataQnA.cs

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
using Microsoft.SemanticKernel.Orchestration;
using Microsoft.SemanticKernel.SkillDefinition;
using PlayFabExamples.Common.Configuration;
using System.Text.Json;
using PlayFabExamples.Example01_DataQnA.Reports;
using System.Text.RegularExpressions;

namespace PlayFabExamples.Example01_DataQnA;

Expand All @@ -19,39 +22,28 @@ public class InlineDataProcessorSkill
/// The system prompt for a chat that creates python scripts to solve analytic problems
/// </summary>
private static readonly string CreatePythonScriptSystemPrompt = @"
You're a python script programmer.
Once you get a question, write a Python script that loads the comma-separated (CSV) data inline (within the script) into a dataframe.
The CSV data should not be assumed to be available in any external file.
Only load the data from [Input CSV]. do not attempt to initialize the data frame with any additional rows.
You're a python script programmer that write code to answer business questions.
The python script should use the print() function to output to screen the following details in a user-friendly message:
- First start by printing the final answer to the question.
- Print additional explanation and intermediate results that led to the final answer.
The python script that uses one or more of the data-frames loaded in [Input DataFrames] below and should favor using the one under [Input DataFrame 1].
Do not attempt to initialize other data frames and do not assume any other columns other than what documented below.
The script should:
- Attempt to answer the provided question and print the output to the console as a user-friendly answer.
- Print facts and calculations that lead to this answer
- Print intermediate calculations and chain of thought that lead to this answer
- Import any necessary modules within the script (e.g., import datetime if used)
- If the script needs to use StringIO, it should import io, and then use it as io.StringIO (To avoid this error: module 'pandas.compat' has no attribute 'StringIO')
The script can use one or more of the provided inline scripts and should favor the ones relevant to the question.
Simply output the final script below without anything beside the code and its inline documentation.
Never attempt to calculate the MonthlyActiveUsers as a sum of DailyActiveUsers since DailyActiveUsers only gurantees the user uniqueness within a single day
Never attempt to calculate the MonthlyActiveUsers as a sum of DailyActiveUsers since DailyActiveUsers only guarantees the user uniqueness within a single day.
Do not assume certain order of the data-frame. Always sort the data-frame by the relevant columns before using it.
Date columns are string formatted so you need to convert them to datetime before using them.
[Input CSV]
{{$inlineData}}
Today Date: {{$date.today}}
[Input DataFrames]
{{$inlineData}}
";

/// <summary>
/// The user agent prompt for fixing a python script that has runtime errors
/// </summary>
private static readonly string FixPythonScriptPrompt = @"
The following python error has encountered while running the script above.
Fix the script so it has no errors.
Make the minimum changes that are required. If you need to use StringIO, make sure to import io, and then use it as io.StringIO
simply output the final script below without any additional explanations.
[Error]
{{$error}}
[Fixed Script]
";
#endregion

#region Data Members
Expand Down Expand Up @@ -85,76 +77,71 @@ public InlineDataProcessorSkill(ISemanticTextMemory memory)
SKName("GetAnswerForGameQuestion"),
Description("Answers questions about game's data and its players around engagement, usage, time spent and game analytics")]
public async Task<string> GetAnswerForGameQuestionAsync(
[Description("The question related to the provided inline data.")]
string question,
SKContext context)
[Description("The question related to the provided inline data.")]
string question,
SKContext context)
{
StringBuilder stringBuilder = new();
var memories = _memory.SearchAsync("TitleID-Reports", question, limit: 2, minRelevanceScore: 0.65);
StringBuilder systemMessageInlineData = new();
IAsyncEnumerable<MemoryQueryResult> memories = _memory.SearchAsync("TitleID-Reports", question, limit: 2, minRelevanceScore: 0.7);
int idx = 1;

List<PlayFabReport> playFabReports = new();
StringBuilder pythonScriptScriptActualPrefix = new();
await foreach (MemoryQueryResult memory in memories)
{
stringBuilder.AppendLine($"[Input CSV {idx++}]");
stringBuilder.AppendLine(memory.Metadata.Text);
stringBuilder.AppendLine();
}
PlayFabReport playFabReport = JsonSerializer.Deserialize<PlayFabReport>(memory.Metadata.AdditionalMetadata);
playFabReports.Add(playFabReport);

string csvData = stringBuilder.ToString();
string ret = await CreateAndExcecutePythonScript(question, csvData);
return ret;
}
#endregion

#region Private Methods
/// <summary>
/// Creates and executes a python script to get an answer for an analytic question
/// </summary>
/// <param name="question">The analytic question</param>
/// <param name="inlineData">The data that can be used to answer the given question (e.g: can be list of CSV reports)</param>
/// <returns>The final answer</returns>
private async Task<string> CreateAndExcecutePythonScript(string question, string inlineData)
{
DateTime today = DateTime.UtcNow;
systemMessageInlineData.AppendLine($"[DataFrame {idx++}]");
systemMessageInlineData.AppendLine(playFabReport.GetDetailedDescription());
systemMessageInlineData.AppendLine($"{playFabReport.ReportName} = pd.read_csv('{playFabReport.ReportName}.csv')");
systemMessageInlineData.AppendLine();
}

var chatCompletion = new ChatCompletionsOptions()
{
Messages =
{
new ChatMessage(ChatRole.System, CreatePythonScriptSystemPrompt.Replace("{{$inlineData}}", inlineData)),
new ChatMessage(ChatRole.User, question + "\nPrint facts and calculations that lead to this answer\n[Python Script]")
new ChatMessage(
ChatRole.System,
CreatePythonScriptSystemPrompt
.Replace("{{$inlineData}}", systemMessageInlineData.ToString())
.Replace("{{$date.today}}", DateTime.UtcNow.ToString("yyyy/MM/dd"))),
new ChatMessage(ChatRole.User, question)
},
Temperature = 0.1f,
MaxTokens = 8000,
NucleusSamplingFactor = 1f,
NucleusSamplingFactor = 0f,
FrequencyPenalty = 0,
PresencePenalty = 0,
};

Azure.Response<ChatCompletions> response = await this._openAIClient.GetChatCompletionsAsync(
deploymentOrModelName: TestConfiguration.AzureOpenAI.ChatDeploymentName, chatCompletion);

// Path to the Python executable
string pythonPath = "python"; // Use "python3" if on a Unix-like system

int retry = 0;
while (retry++ < 3)
{
Azure.Response<ChatCompletions> response = await this._openAIClient.GetChatCompletionsAsync(
deploymentOrModelName: TestConfiguration.AzureOpenAI.ChatDeploymentName, chatCompletion);

string rawResponse = response.Value.Choices.Single().Message.Content;
string pythonScript = ExtractPythonScript(rawResponse);
pythonScript = AddMissingDataFrames(pythonScript, playFabReports);
pythonScript = AddMissingImports(pythonScript);

string pythonScriptWithInlinedData = InlineDataFramesData(pythonScript, playFabReports);

// Inline Python script
string pythonScript = response.Value.Choices[0].Message.Content;
pythonScript = GetScriptOnly(pythonScript)
pythonScriptWithInlinedData = pythonScriptWithInlinedData
.Replace("\"", "\\\"") // Quote so we can run python via commandline
.Replace("pd.compat.StringIO(", "io.StringIO("); // Fix common script mistake

if (!pythonScript.Contains("import io"))
{
pythonScript = "import io\n\n" + pythonScript;
}
// Path to the Python executable
string pythonPath = "python"; // Use "python3" if on a Unix-like system

// Create a ProcessStartInfo and set the required properties
var startInfo = new ProcessStartInfo
{
FileName = pythonPath,
Arguments = "-c \"" + pythonScript + "\"",
Arguments = "-c \"" + pythonScriptWithInlinedData + "\"",
RedirectStandardOutput = true,
RedirectStandardError = true,
UseShellExecute = false,
Expand All @@ -168,22 +155,26 @@ private async Task<string> CreateAndExcecutePythonScript(string question, string
process.Start();

// Read the Python process output and error
string output = process.StandardOutput.ReadToEnd().Trim();
string warningsAndErrors = process.StandardError.ReadToEnd().Trim();

string output = (await process.StandardOutput.ReadToEndAsync()).Trim();
string warningsAndErrors = (await process.StandardError.ReadToEndAsync()).Trim();

// Wait for the process to finish
process.WaitForExit();

// If there are errors in the script, try to fix them
if (process.ExitCode != 0)
{
Console.WriteLine("Error in script: " + warningsAndErrors);
chatCompletion.Messages.Add(new ChatMessage(ChatRole.Assistant, pythonScript));
chatCompletion.Messages.Add(new ChatMessage(ChatRole.User, FixPythonScriptPrompt.Replace("{{$error}}", warningsAndErrors)));

response = await this._openAIClient.GetChatCompletionsAsync(
deploymentOrModelName: TestConfiguration.AzureOpenAI.ChatDeploymentName, chatCompletion);
chatCompletion.Messages.Add(new ChatMessage(
ChatRole.User,
"The following error/s occured. Can you write the fixed script? " + Environment.NewLine + warningsAndErrors));
}
else if (string.IsNullOrEmpty(output))
{
chatCompletion.Messages.Add(new ChatMessage(ChatRole.Assistant, pythonScript));
chatCompletion.Messages.Add(new ChatMessage(
ChatRole.User,
"The script seems to be lacking any output (via print() function). Can you write the fixed script? " + Environment.NewLine + warningsAndErrors));
}
else
{
Expand All @@ -192,23 +183,94 @@ private async Task<string> CreateAndExcecutePythonScript(string question, string
}

return "Couldn't get an answer";

}

private string GetScriptOnly(string pythonScript)
#endregion

#region Private Methods

private string ExtractPythonScript(string inputText)
{
// Extract Python script using regular expressions
string pattern = @"```(?:\s*)python(.*?)```";
MatchCollection matches = Regex.Matches(inputText, pattern, RegexOptions.Singleline);

if (matches.Count == 0)
{
return inputText;
}

StringBuilder ret = new();
foreach (Match match in matches)
{
string pythonScript = match.Groups[1].Value.Trim();
ret.AppendLine(pythonScript);
ret.AppendLine();
}

return ret.ToString();
}

private string AddMissingDataFrames(string pythonScript, List<PlayFabReport> playFabReports)
{
bool hadMissingDataFrames = false;
foreach (PlayFabReport report in playFabReports)
{
bool isFirstMissing = true;
if (pythonScript.Contains(report.ReportName) && !pythonScript.Contains($"'{report.ReportName}.csv'"))
{
if (isFirstMissing)
{
isFirstMissing = false;
pythonScript = Environment.NewLine + pythonScript;
}

pythonScript = $"{report.ReportName} = pd.read_csv('{report.ReportName}.csv')" + Environment.NewLine + pythonScript;
}
}

if (hadMissingDataFrames)
{
pythonScript += Environment.NewLine;
}

return pythonScript;
}

private string AddMissingImports(string pythonScript)
{
const string startMarker = "```python";
const string endMarker = "```";
string[] requiredImports = new[]
{
"import pandas as pd",
"import io"
};

string ret = pythonScript;
int startIndex = pythonScript.IndexOf(startMarker) + startMarker.Length;
int endIndex = pythonScript.LastIndexOf(endMarker);
// Ensure required imports show up at the top of the script and only once. Move them to the top if needed
pythonScript = Environment.NewLine + pythonScript;
foreach (string requiredImport in requiredImports)
{
if (pythonScript.Contains(requiredImport))
{
pythonScript = pythonScript.Replace(requiredImport, "");
}

pythonScript = requiredImport + Environment.NewLine + pythonScript;
}

return pythonScript;
}

if (startIndex >= 0 && endIndex > startIndex)
private string InlineDataFramesData(string pythonScript, List<PlayFabReport> playFabReports)
{
foreach (var report in playFabReports)
{
ret = pythonScript.Substring(startIndex, endIndex - startIndex);
pythonScript = pythonScript.Replace(
$"'{report.ReportName}.csv'",
$"io.StringIO('''{report.GetCsvHeader()}\n{report.CsvData}''')");
}

return ret;
return pythonScript;
}

#endregion
Expand Down
Loading

0 comments on commit ca066b0

Please sign in to comment.