From 5a1a1856a4dfa1335d937437fade5c0bbab06560 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sun, 6 Oct 2013 20:25:49 +0000 Subject: Windows: Add support for unicode command lines Summary: The MSVCRT deliberately sends main() code-page specific characters. This isn't too useful to LLVM as we end up converting the arguments to UTF-16 and subsequently attempt to use the result as, for example, a file name. Instead, we need to have the ability to access the Unicode command line and transform it to UTF-8. This has the distinct advantage over using the MSVC-specific wmain() function as our entry point because: - It doesn't work on cygwin. - It only work on MinGW with caveats and only then on certain versions. - We get to keep our entry point as main(). :) N.B. This patch includes fixes to other parts of lib/Support/Windows s.t. we would be able to take advantage of getting the Unicode paths. E.G. clang spawning clang -cc1 would want to give it Unicode arguments. Reviewers: aaron.ballman, Bigcheese, rnk, ruiu Reviewed By: rnk CC: llvm-commits, ygao Differential Revision: http://llvm-reviews.chandlerc.com/D1834 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192069 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Windows/Process.inc | 64 +++++++++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 12 deletions(-) (limited to 'lib/Support/Windows/Process.inc') diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc index 5d776504fb..7f7e06c855 100644 --- a/lib/Support/Windows/Process.inc +++ b/lib/Support/Windows/Process.inc @@ -11,18 +11,25 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Allocator.h" + #include "Windows.h" #include #include #include #include +#include #ifdef __MINGW32__ #if (HAVE_LIBPSAPI != 1) #error "libpsapi.a should be present" #endif + #if (HAVE_LIBSHELL32 != 1) + #error "libshell32.a should be present" + #endif #else - #pragma comment(lib, "psapi.lib") +#pragma comment(lib, "psapi.lib") +#pragma comment(lib, "Shell32.lib") #endif //===----------------------------------------------------------------------===// @@ -151,25 +158,58 @@ Optional Process::GetEnv(StringRef Name) { // Environment variable can be encoded in non-UTF8 encoding, and there's no // way to know what the encoding is. The only reliable way to look up // multibyte environment variable is to use GetEnvironmentVariableW(). - std::vector Buf(16); - size_t Size = 0; - for (;;) { - Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.size()); - if (Size < Buf.size()) - break; + SmallVector Buf; + size_t Size = MAX_PATH; + do { + Buf.reserve(Size); + Size = GetEnvironmentVariableW(&NameUTF16[0], &Buf[0], Buf.capacity()); + if (Size == 0) + return None; + // Try again with larger buffer. - Buf.resize(Size + 1); - } - if (Size == 0) - return None; + } while (Size > Buf.capacity()); + Buf.set_size(Size); // Convert the result from UTF-16 to UTF-8. - SmallVector Res; + SmallVector Res; if (error_code ec = windows::UTF16ToUTF8(&Buf[0], Size, Res)) return None; return std::string(&Res[0]); } +error_code +Process::GetArgumentVector(SmallVectorImpl &Args, + ArrayRef, + SpecificBumpPtrAllocator &ArgAllocator) { + int NewArgCount; + error_code ec; + + wchar_t **UnicodeCommandLine = CommandLineToArgvW(GetCommandLineW(), + &NewArgCount); + if (!UnicodeCommandLine) + return windows_error(::GetLastError()); + + Args.reserve(NewArgCount); + + for (int i = 0; i < NewArgCount; ++i) { + SmallVector NewArgString; + ec = windows::UTF16ToUTF8(UnicodeCommandLine[i], + wcslen(UnicodeCommandLine[i]), + NewArgString); + if (ec) + break; + + char *Buffer = ArgAllocator.Allocate(NewArgString.size() + 1); + ::memcpy(Buffer, NewArgString.data(), NewArgString.size() + 1); + Args.push_back(Buffer); + } + LocalFree(UnicodeCommandLine); + if (ec) + return ec; + + return error_code::success(); +} + bool Process::StandardInIsUserInput() { return FileDescriptorIsDisplayed(0); } -- cgit v1.2.3