Skip to content

Commit

Permalink
Change SString::GetUTF8NoConvert to GetUTF8 that converts the SString (
Browse files Browse the repository at this point in the history
…#71101)

* Change SString::GetUTF8NoConvert to GetUTF8 that converts the SString

This enables SString to get out of the UTF16 state and helps move us away from "SString's natural encoding is UTF16"

* Remove some stack scratch buffers that are unneeded now (as we can now convert the SString itself and in some cases it was already in UTF8) and implement PR feedback.

* Add SetAndConvertToUTF8 method and remove the GetUTF8 method variants with a scratch buffer.

* Remove unneeded variables.

Co-authored-by: Jan Kotas <jkotas@microsoft.com>

Co-authored-by: Aaron Robinson <arobins@microsoft.com>
Co-authored-by: Jan Kotas <jkotas@microsoft.com>
  • Loading branch information
3 people committed Jun 23, 2022
1 parent 982946a commit d17741d
Show file tree
Hide file tree
Showing 39 changed files with 210 additions and 235 deletions.
2 changes: 1 addition & 1 deletion src/coreclr/debug/daccess/daccess.cpp
Expand Up @@ -2501,7 +2501,7 @@ namespace serialization { namespace bin {
return ErrOverflow;
}

memcpy_s(dest, destSize, s.GetUTF8NoConvert(), cnt);
memcpy_s(dest, destSize, s.GetUTF8(), cnt);

return cnt;
}
Expand Down
14 changes: 7 additions & 7 deletions src/coreclr/inc/sstring.h
Expand Up @@ -168,6 +168,7 @@ class EMPTY_BASES_DECL SString : private SBuffer
void SetASCII(const ASCII *string);
void SetUTF8(const UTF8 *string);
void SetANSI(const ANSI *string);
void SetAndConvertToUTF8(const WCHAR* string);

// Set this string to a copy of the first count chars of the given string
void Set(const WCHAR *string, COUNT_T count);
Expand Down Expand Up @@ -492,17 +493,15 @@ class EMPTY_BASES_DECL SString : private SBuffer
// SString *s = ...;
// {
// StackScratchBuffer buffer;
// const UTF8 *utf8 = s->GetUTF8(buffer);
// CallFoo(utf8);
// const ANSI *ansi = s->GetANSI(buffer);
// CallFoo(ansi);
// }
// // No more pointers to returned buffer allowed.

const UTF8 *GetUTF8(AbstractScratchBuffer &scratch) const;
const UTF8 *GetUTF8(AbstractScratchBuffer &scratch, COUNT_T *pcbUtf8) const;
const ANSI *GetANSI(AbstractScratchBuffer &scratch) const;

// Used when the representation is known, throws if the representation doesn't match
const UTF8 *GetUTF8NoConvert() const;
// You can always get a UTF8 string. This will force a conversion
// if necessary.
const UTF8 *GetUTF8() const;

// Converts/copies into the given output string
void ConvertToUnicode(SString &dest) const;
Expand Down Expand Up @@ -727,6 +726,7 @@ class EMPTY_BASES_DECL SString : private SBuffer
void ConvertASCIIToUnicode(SString &dest) const;
void ConvertToUnicode() const;
void ConvertToUnicode(const CIterator &i) const;
void ConvertToUTF8() const;

const SString &GetCompatibleString(const SString &s, SString &scratch) const;
const SString &GetCompatibleString(const SString &s, SString &scratch, const CIterator &i) const;
Expand Down
19 changes: 19 additions & 0 deletions src/coreclr/inc/sstring.inl
Expand Up @@ -651,6 +651,25 @@ inline const WCHAR *SString::GetUnicode() const
SS_RETURN GetRawUnicode();
}

// Get a const pointer to the internal buffer as a UTF8 string.
inline const UTF8 *SString::GetUTF8() const
{
SS_CONTRACT(const UTF8 *)
{
GC_NOTRIGGER;
PRECONDITION(CheckPointer(this));
SS_POSTCONDITION(CheckPointer(RETVAL));
if (IsRepresentation(REPRESENTATION_UTF8)) NOTHROW; else THROWS;
GC_NOTRIGGER;
SUPPORTS_DAC;
}
SS_CONTRACT_END;

ConvertToUTF8();

SS_RETURN GetRawUTF8();
}

// Normalize the string to unicode. This will make many operations nonfailing.
inline void SString::Normalize() const
{
Expand Down
8 changes: 2 additions & 6 deletions src/coreclr/utilcode/clrconfig.cpp
Expand Up @@ -200,15 +200,11 @@ namespace
#if defined(DEBUG) && !defined(SELF_NO_HOST)
// Validate the cache and no-cache logic result in the same answer
SString nameToConvert(name);
SString nameAsUTF8;
nameToConvert.ConvertToUTF8(nameAsUTF8);
SString valueAsUTF8;
temp.ConvertToUTF8(valueAsUTF8);

CLRConfigNoCache nonCache = CLRConfigNoCache::Get(nameAsUTF8.GetUTF8NoConvert(), noPrefix);
CLRConfigNoCache nonCache = CLRConfigNoCache::Get(nameToConvert.GetUTF8(), noPrefix);
LPCSTR valueNoCache = nonCache.AsString();

_ASSERTE(SString::_stricmp(valueNoCache, valueAsUTF8.GetUTF8NoConvert()) == 0);
_ASSERTE(SString::_stricmp(valueNoCache, temp.GetUTF8()) == 0);
#endif // defined(DEBUG) && !defined(SELF_NO_HOST)
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/utilcode/debug.cpp
Expand Up @@ -364,7 +364,7 @@ bool _DbgBreakCheck(
" Image: %s\n\n",
GetCurrentProcessId(), GetCurrentProcessId(),
GetCurrentThreadId(), GetCurrentThreadId(),
szExpr, szFile, iLine, modulePath.GetUTF8NoConvert());
szExpr, szFile, iLine, modulePath.GetUTF8());

formattedMessages = TRUE;
}
Expand Down Expand Up @@ -680,11 +680,11 @@ void DECLSPEC_NORETURN __FreeBuildAssertFail(const char *szFile, int iLine, cons
" File: %s, Line: %d Image:\n%s\n",
GetCurrentProcessId(), GetCurrentProcessId(),
GetCurrentThreadId(), GetCurrentThreadId(),
szExpr, szFile, iLine, modulePath.GetUTF8NoConvert());
OutputDebugStringUtf8(buffer.GetUTF8NoConvert());
szExpr, szFile, iLine, modulePath.GetUTF8());
OutputDebugStringUtf8(buffer.GetUTF8());

// Write out the error to the console
printf(buffer.GetUTF8NoConvert());
printf(buffer.GetUTF8());

// Log to the stress log. Note that we can't include the szExpr b/c that
// may not be a string literal (particularly for formatt-able asserts).
Expand Down
116 changes: 56 additions & 60 deletions src/coreclr/utilcode/sstring.cpp
Expand Up @@ -469,6 +469,29 @@ void SString::SetANSI(const ANSI *string, COUNT_T count)
SS_RETURN;
}

//-----------------------------------------------------------------------------
// Set this string to a copy of the given UTF16 string transcoded to UTF8
//-----------------------------------------------------------------------------
void SString::SetAndConvertToUTF8(const WCHAR *string)
{
SS_CONTRACT_VOID
{
// !!! Check for illegal UTF8 encoding?
INSTANCE_CHECK;
PRECONDITION(CheckPointer(string, NULL_OK));
THROWS;
GC_NOTRIGGER;
SUPPORTS_DAC_HOST_ONLY;
}
SS_CONTRACT_END;

SString utf16Str(Literal, string);

utf16Str.ConvertToUTF8(*this);

SS_RETURN;
}

//-----------------------------------------------------------------------------
// Set this string to the given unicode character
//-----------------------------------------------------------------------------
Expand Down Expand Up @@ -777,6 +800,39 @@ void SString::ConvertToUnicode(const CIterator &i) const
RETURN;
}

//-----------------------------------------------------------------------------
// Convert the internal representation for this String to UTF8.
//-----------------------------------------------------------------------------
void SString::ConvertToUTF8() const
{
CONTRACT_VOID
{
POSTCONDITION(IsRepresentation(REPRESENTATION_UTF8));
if (IsRepresentation(REPRESENTATION_UTF8)) NOTHROW; else THROWS;
GC_NOTRIGGER;
SUPPORTS_DAC_HOST_ONLY;
}
CONTRACT_END;

if (!IsRepresentation(REPRESENTATION_UTF8))
{
if (IsRepresentation(REPRESENTATION_ASCII))
{
// ASCII is a subset of UTF8, so we can just set the representation.
(const_cast<SString*>(this))->SetRepresentation(REPRESENTATION_UTF8);
}
else
{
StackSString s;
ConvertToUTF8(s);
PREFIX_ASSUME(!s.IsImmutable());
(const_cast<SString*>(this))->Set(s);
}
}

RETURN;
}

//-----------------------------------------------------------------------------
// Set s to be a copy of this string's contents, but in the unicode format.
//-----------------------------------------------------------------------------
Expand Down Expand Up @@ -1787,66 +1843,6 @@ const CHAR *SString::GetANSI(AbstractScratchBuffer &scratch) const
SS_RETURN ((SString&)scratch).GetRawANSI();
}

//-----------------------------------------------------------------------------
// Get a const pointer to the internal buffer as a UTF8 string.
//-----------------------------------------------------------------------------
const UTF8 *SString::GetUTF8(AbstractScratchBuffer &scratch) const
{
CONTRACT(const UTF8 *)
{
INSTANCE_CHECK_NULL;
THROWS;
GC_NOTRIGGER;
}
CONTRACT_END;

if (IsRepresentation(REPRESENTATION_UTF8))
RETURN GetRawUTF8();

ConvertToUTF8((SString&)scratch);
RETURN ((SString&)scratch).GetRawUTF8();
}

const UTF8 *SString::GetUTF8(AbstractScratchBuffer &scratch, COUNT_T *pcbUtf8) const
{
CONTRACT(const UTF8 *)
{
INSTANCE_CHECK_NULL;
THROWS;
GC_NOTRIGGER;
}
CONTRACT_END;

if (IsRepresentation(REPRESENTATION_UTF8))
{
*pcbUtf8 = GetRawCount() + 1;
RETURN GetRawUTF8();
}

*pcbUtf8 = ConvertToUTF8((SString&)scratch);
RETURN ((SString&)scratch).GetRawUTF8();
}

//-----------------------------------------------------------------------------
// Get a const pointer to the internal buffer which must already be a UTF8 string.
// This avoids the need to create a scratch buffer we know will never be used.
//-----------------------------------------------------------------------------
const UTF8 *SString::GetUTF8NoConvert() const
{
CONTRACT(const UTF8 *)
{
INSTANCE_CHECK_NULL;
THROWS;
GC_NOTRIGGER;
}
CONTRACT_END;

if (IsRepresentation(REPRESENTATION_UTF8))
RETURN GetRawUTF8();

ThrowHR(E_INVALIDARG);
}

//-----------------------------------------------------------------------------
// Safe version of sprintf.
// Prints formatted ansi text w/ var args to this buffer.
Expand Down
9 changes: 3 additions & 6 deletions src/coreclr/vm/array.cpp
Expand Up @@ -402,8 +402,7 @@ MethodTable* Module::CreateArrayMethodTable(TypeHandle elemTypeHnd, CorElementTy
StackSString ssElemName;
elemTypeHnd.GetName(ssElemName);

StackScratchBuffer scratch;
elemTypeHnd.GetAssembly()->ThrowTypeLoadException(ssElemName.GetUTF8(scratch), IDS_CLASSLOAD_VALUECLASSTOOLARGE);
elemTypeHnd.GetAssembly()->ThrowTypeLoadException(ssElemName.GetUTF8(), IDS_CLASSLOAD_VALUECLASSTOOLARGE);
}
}

Expand Down Expand Up @@ -510,8 +509,7 @@ MethodTable* Module::CreateArrayMethodTable(TypeHandle elemTypeHnd, CorElementTy
#ifdef _DEBUG
StackSString debugName;
TypeString::AppendType(debugName, TypeHandle(pMT));
StackScratchBuffer buff;
const char* pDebugNameUTF8 = debugName.GetUTF8(buff);
const char* pDebugNameUTF8 = debugName.GetUTF8();
S_SIZE_T safeLen = S_SIZE_T(strlen(pDebugNameUTF8))+S_SIZE_T(1);
if(safeLen.IsOverflow()) COMPlusThrowHR(COR_E_OVERFLOW);
size_t len = safeLen.Value();
Expand Down Expand Up @@ -657,8 +655,7 @@ MethodTable* Module::CreateArrayMethodTable(TypeHandle elemTypeHnd, CorElementTy
StackSString ssElemName;
elemTypeHnd.GetName(ssElemName);

StackScratchBuffer scratch;
elemTypeHnd.GetAssembly()->ThrowTypeLoadException(ssElemName.GetUTF8(scratch),
elemTypeHnd.GetAssembly()->ThrowTypeLoadException(ssElemName.GetUTF8(),
IDS_CLASSLOAD_VALUECLASSTOOLARGE);
}

Expand Down
3 changes: 1 addition & 2 deletions src/coreclr/vm/assembly.cpp
Expand Up @@ -993,8 +993,7 @@ Module *Assembly::FindModuleByName(LPCSTR pszModuleName)
SString moduleName(SString::Utf8, pszModuleName);
moduleName.LowerCase();

StackScratchBuffer buffer;
pszModuleName = moduleName.GetUTF8(buffer);
pszModuleName = moduleName.GetUTF8();

mdFile kFile = GetManifestFileToken(pszModuleName);
if (kFile == mdTokenNil)
Expand Down
21 changes: 10 additions & 11 deletions src/coreclr/vm/assemblynative.cpp
Expand Up @@ -76,7 +76,7 @@ extern "C" void QCALLTYPE AssemblyNative_InternalLoad(NativeAssemblyNameParts* p
COMPlusThrow(kArgumentException, W("Format_StringZeroLength"));

StackSString ssName;
SString(SString::Literal, pAssemblyNameParts->_pName).ConvertToUTF8(ssName);
ssName.SetAndConvertToUTF8(pAssemblyNameParts->_pName);

AssemblyMetaDataInternal asmInfo;

Expand All @@ -87,11 +87,11 @@ extern "C" void QCALLTYPE AssemblyNative_InternalLoad(NativeAssemblyNameParts* p

SmallStackSString ssLocale;
if (pAssemblyNameParts->_pCultureName != NULL)
SString(SString::Literal, pAssemblyNameParts->_pCultureName).ConvertToUTF8(ssLocale);
asmInfo.szLocale = (pAssemblyNameParts->_pCultureName != NULL) ? ssLocale.GetUTF8NoConvert() : NULL;
ssLocale.SetAndConvertToUTF8(pAssemblyNameParts->_pCultureName);
asmInfo.szLocale = (pAssemblyNameParts->_pCultureName != NULL) ? ssLocale.GetUTF8() : NULL;

// Initialize spec
spec.Init(ssName.GetUTF8NoConvert(), &asmInfo,
spec.Init(ssName.GetUTF8(), &asmInfo,
pAssemblyNameParts->_pPublicKeyOrToken, pAssemblyNameParts->_cbPublicKeyOrToken, pAssemblyNameParts->_flags);

if (pParentAssembly != NULL)
Expand Down Expand Up @@ -541,10 +541,10 @@ extern "C" BYTE * QCALLTYPE AssemblyNative_GetResource(QCall::AssemblyHandle pAs
COMPlusThrow(kArgumentNullException, W("ArgumentNull_String"));

// Get the name in UTF8
SString name(SString::Literal, wszName);
StackSString name;
name.SetAndConvertToUTF8(wszName);

StackScratchBuffer scratch;
LPCUTF8 pNameUTF8 = name.GetUTF8(scratch);
LPCUTF8 pNameUTF8 = name.GetUTF8();

if (*pNameUTF8 == '\0')
COMPlusThrow(kArgumentException, W("Format_StringZeroLength"));
Expand All @@ -571,10 +571,9 @@ extern "C" INT32 QCALLTYPE AssemblyNative_GetManifestResourceInfo(QCall::Assembl
COMPlusThrow(kArgumentNullException, W("ArgumentNull_String"));

// Get the name in UTF8
SString name(SString::Literal, wszName);

StackScratchBuffer scratch;
LPCUTF8 pNameUTF8 = name.GetUTF8(scratch);
StackSString name;
name.SetAndConvertToUTF8(wszName);
LPCUTF8 pNameUTF8 = name.GetUTF8();

if (*pNameUTF8 == '\0')
COMPlusThrow(kArgumentException, W("Format_StringZeroLength"));
Expand Down
18 changes: 14 additions & 4 deletions src/coreclr/vm/assemblyspec.cpp
Expand Up @@ -275,13 +275,23 @@ void AssemblySpec::InitializeAssemblyNameRef(_In_ BINDER_SPACE::AssemblyName* as
AssemblySpec spec;
spec.InitializeWithAssemblyIdentity(assemblyName);

StackScratchBuffer nameBuffer;
spec.SetName(assemblyName->GetSimpleName().GetUTF8(nameBuffer));
StackSString nameBuffer;
nameBuffer.SetAndConvertToUTF8(assemblyName->GetSimpleName().GetUnicode());
spec.SetName(nameBuffer.GetUTF8());

StackScratchBuffer cultureBuffer;
StackSString cultureBuffer;
if (assemblyName->Have(BINDER_SPACE::AssemblyIdentity::IDENTITY_FLAG_CULTURE))
{
LPCSTR culture = assemblyName->IsNeutralCulture() ? "" : assemblyName->GetCulture().GetUTF8(cultureBuffer);
LPCSTR culture;
if (assemblyName->IsNeutralCulture())
{
culture = "";
}
else
{
cultureBuffer.SetAndConvertToUTF8(assemblyName->GetCulture().GetUnicode());
culture = cultureBuffer.GetUTF8();
}
spec.SetCulture(culture);
}

Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/vm/bundle.cpp
Expand Up @@ -60,8 +60,9 @@ BundleFileLocation Bundle::Probe(const SString& path, bool pathIsBundleRelative)
// Bundle.Probe("path/to/exe/lib.dll") => m_probe("lib.dll")
// Bundle.Probe("path/to/exe/and/some/more/lib.dll") => m_probe("and/some/more/lib.dll")

StackScratchBuffer scratchBuffer;
LPCSTR utf8Path(path.GetUTF8(scratchBuffer));
StackSString pathBuffer;
pathBuffer.SetAndConvertToUTF8(path.GetUnicode());
LPCSTR utf8Path(pathBuffer.GetUTF8());

if (!pathIsBundleRelative)
{
Expand Down

0 comments on commit d17741d

Please sign in to comment.