Skip to content

Commit

Permalink
Fix output for user strings in R2RDump (#36935)
Browse files Browse the repository at this point in the history
Quote user strings and escape control characters, unpaired surrogates, and other unsafe characters.
  • Loading branch information
AntonLapounov authored May 25, 2020
1 parent 8e6bd8c commit cb59fba
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1723,12 +1723,11 @@ private void ParseHelper(StringBuilder builder)
/// <summary>
/// Read a string token from the signature stream and convert it to the actual string.
/// </summary>
/// <returns></returns>
private void ParseStringHandle(StringBuilder builder)
{
uint rid = ReadUIntAndEmitInlineSignatureBinary(builder);
UserStringHandle stringHandle = MetadataTokens.UserStringHandle((int)rid);
builder.Append(_metadataReader.GetUserString(stringHandle));
builder.AppendEscapedString(_metadataReader.GetUserString(stringHandle));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Globalization;
using System.Text;

namespace ILCompiler.Reflection.ReadyToRun
{
public static class StringBuilderExtensions
{
/// <summary>
/// Appends a C# string literal with the given value to the string builder.
/// </summary>
/// <remarks>
/// This method closely follows the logic in <see cref="Microsoft.CodeAnalysis.CSharp.ObjectDisplay.FormatLiteral(string, ObjectDisplayOptions)"/>
/// method in Roslyn .NET compiler; see its
/// <a href="https://github.com/dotnet/roslyn/blob/master/src/Compilers/CSharp/Portable/SymbolDisplay/ObjectDisplay.cs">sources</a> for reference.
/// </remarks>
public static StringBuilder AppendEscapedString(this StringBuilder builder, string value)
{
builder.Append('"');

for (int i = 0; i < value.Length; i++)
{
char c = value[i];
UnicodeCategory category;

// Fast check for printable ASCII characters
if ((c <= 0x7e) && (c >= 0x20) || !NeedsEscaping(category = CharUnicodeInfo.GetUnicodeCategory(c)))
{
if ((c == '"') || (c == '\\'))
{
builder.Append(@"\");
}
builder.Append(c);
}
else if (category == UnicodeCategory.Surrogate)
{
// Check for a valid surrogate pair
category = CharUnicodeInfo.GetUnicodeCategory(value, i);
if (category == UnicodeCategory.Surrogate)
{
// Escape an unpaired surrogate
builder.Append(@"\u" + ((int)c).ToString("x4"));
}
else if (NeedsEscaping(category))
{
// A surrogate pair that needs to be escaped
int codePoint = char.ConvertToUtf32(value, i);
builder.Append(@"\U" + codePoint.ToString("x8"));
i++; // Skip the already-encoded second surrogate of the pair
}
else
{
// Copy a printable surrogate pair
builder.Append(c);
builder.Append(value[++i]);
}
}
else
{
string escaped = c switch
{
'\0' => @"\0",
'\a' => @"\a",
'\b' => @"\b",
'\f' => @"\f",
'\n' => @"\n",
'\r' => @"\r",
'\t' => @"\t",
'\v' => @"\v",
_ => @"\u" + ((int)c).ToString("x4")
};
builder.Append(escaped);
}
}

builder.Append('"');
return builder;
}

/// <summary>
/// Determines whether characters of the given <see cref="UnicodeCategory"/> will be represented with escape sequences.
/// </summary>
private static bool NeedsEscaping(UnicodeCategory category)
{
switch (category)
{
case UnicodeCategory.LineSeparator:
case UnicodeCategory.ParagraphSeparator:
case UnicodeCategory.Control:
case UnicodeCategory.Surrogate:
case UnicodeCategory.OtherNotAssigned:
return true;
default:
return false;
}
}
}

public static class StringExtensions
{
/// <summary>
/// Returns a C# string literal with the given value.
/// </summary>
public static string ToEscapedString(this string value)
{
return new StringBuilder(value.Length + 16).AppendEscapedString(value).ToString();
}
}
}
6 changes: 4 additions & 2 deletions src/coreclr/src/tools/r2rdump/R2RDump.cs
Original file line number Diff line number Diff line change
Expand Up @@ -203,12 +203,14 @@ class R2RDump
{
private readonly DumpOptions _options;
private readonly Dictionary<ReadyToRunSectionType, bool> _selectedSections = new Dictionary<ReadyToRunSectionType, bool>();
private readonly Encoding _encoding;
private readonly TextWriter _writer;
private Dumper _dumper;

private R2RDump(DumpOptions options)
{
_options = options;
_encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: false);

if (_options.Verbose)
{
Expand All @@ -220,7 +222,7 @@ private R2RDump(DumpOptions options)

if (_options.Out != null)
{
_writer = new StreamWriter(_options.Out.FullName, append: false, encoding: new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: false));
_writer = new StreamWriter(_options.Out.FullName, append: false, _encoding);
}
else
{
Expand Down Expand Up @@ -569,7 +571,7 @@ private int Run()
else
{
string perFileOutput = filename.FullName + ".common-methods.r2r";
_dumper = new TextDumper(r2r, new StreamWriter(perFileOutput), disassembler, _options);
_dumper = new TextDumper(r2r, new StreamWriter(perFileOutput, append: false, _encoding), disassembler, _options);
if (previousDumper != null)
{
new R2RDiff(previousDumper, _dumper, _writer).Run();
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/src/tools/r2rdump/TextDumper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -450,12 +450,12 @@ internal override void DumpSectionContents(ReadyToRunSection section)
break;
case ReadyToRunSectionType.OwnerCompositeExecutable:
int oceOffset = _r2r.GetOffset(section.RelativeVirtualAddress);
Decoder decoder = Encoding.UTF8.GetDecoder();
int charLength = decoder.GetCharCount(_r2r.Image, oceOffset, section.Size - 1); // exclude the zero terminator
char[] charArray = new char[charLength];
decoder.GetChars(_r2r.Image, oceOffset, section.Size, charArray, 0, flush: true);
string ownerCompositeExecutable = new string(charArray);
_writer.WriteLine("Composite executable: {0}", ownerCompositeExecutable);
if (_r2r.Image[oceOffset + section.Size - 1] != 0)
{
R2RDump.WriteWarning("String is not zero-terminated");
}
string ownerCompositeExecutable = Encoding.UTF8.GetString(_r2r.Image, oceOffset, section.Size - 1); // exclude the zero terminator
_writer.WriteLine("Composite executable: {0}", ownerCompositeExecutable.ToEscapedString());
break;
}
}
Expand Down

0 comments on commit cb59fba

Please sign in to comment.