From 3f0b779be8218760033ad0ff698ea88c3c7e73c9 Mon Sep 17 00:00:00 2001 From: Pawel Wodnicki Date: Mon, 10 Dec 2012 14:20:28 +0000 Subject: Merging r169719: into 3.2 release branch. Fix PR14548: SROA was crashing on a mixture of i1 and i8 loads and stores. When SROA was evaluating a mixture of i1 and i8 loads and stores, in just a particular case, it would tickle a latent bug where we compared bits to bytes rather than bits to bits. As a consequence of the latent bug, we would allow integers through which were not byte-size multiples, a situation the later rewriting code was never intended to handle. In release builds this could trigger all manner of oddities, but the reported issue in PR14548 was forming invalid bitcast instructions. The only downside of this fix is that it makes it more clear that SROA in its current form is not capable of handling mixed i1 and i8 loads and stores. Sometimes with the previous code this would work by luck, but usually it would crash, so I'm not terribly worried. I'll watch the LNT numbers just to be sure. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_32@169735 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SROA.cpp | 4 ++-- test/Transforms/SROA/basictest.ll | 29 +++++++++++++++++++++++++++++ test/Transforms/SROA/big-endian.ll | 9 ++------- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 062ec05342..2d518f735b 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2201,7 +2201,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; if (IntegerType *ITy = dyn_cast(LI->getType())) { - if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy)) + if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy)) return false; continue; } @@ -2217,7 +2217,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin == 0 && RelEnd == Size) WholeAllocaOp = true; if (IntegerType *ITy = dyn_cast(ValueTy)) { - if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy)) + if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy)) return false; continue; } diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index a291c39b33..9fe926ee2c 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -1147,3 +1147,32 @@ define void @PR14465() { ret void ; CHECK: ret } + +define void @PR14548(i1 %x) { +; Handle a mixture of i1 and i8 loads and stores to allocas. This particular +; pattern caused crashes and invalid output in the PR, and its nature will +; trigger a mixture in several permutations as we resolve each alloca +; iteratively. +; Note that we don't do a particularly good *job* of handling these mixtures, +; but the hope is that this is very rare. +; CHECK: @PR14548 + +entry: + %a = alloca <{ i1 }>, align 8 + %b = alloca <{ i1 }>, align 8 +; Nothing of interest is simplified here. +; CHECK: alloca +; CHECK: alloca + + %b.i1 = bitcast <{ i1 }>* %b to i1* + store i1 %x, i1* %b.i1, align 8 + %b.i8 = bitcast <{ i1 }>* %b to i8* + %foo = load i8* %b.i8, align 1 + + %a.i8 = bitcast <{ i1 }>* %a to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind + %bar = load i8* %a.i8, align 1 + %a.i1 = getelementptr inbounds <{ i1 }>* %a, i32 0, i32 0 + %baz = load i1* %a.i1, align 1 + ret void +} diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll index ce82d1f30b..1ac6d25d63 100644 --- a/test/Transforms/SROA/big-endian.ll +++ b/test/Transforms/SROA/big-endian.ll @@ -82,14 +82,9 @@ entry: %a0i16ptr = bitcast i8* %a0ptr to i16* store i16 1, i16* %a0i16ptr -; CHECK: %[[mask0:.*]] = and i16 1, -16 - - %a1i4ptr = bitcast i8* %a1ptr to i4* - store i4 1, i4* %a1i4ptr -; CHECK-NEXT: %[[insert0:.*]] = or i16 %[[mask0]], 1 store i8 1, i8* %a2ptr -; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, 4294967295 +; CHECK: %[[mask1:.*]] = and i40 undef, 4294967295 ; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], 4294967296 %a3i24ptr = bitcast i8* %a3ptr to i24* @@ -110,7 +105,7 @@ entry: %ai = load i56* %aiptr %ret = zext i56 %ai to i64 ret i64 %ret -; CHECK-NEXT: %[[ext4:.*]] = zext i16 %[[insert0]] to i56 +; CHECK-NEXT: %[[ext4:.*]] = zext i16 1 to i56 ; CHECK-NEXT: %[[shift4:.*]] = shl i56 %[[ext4]], 40 ; CHECK-NEXT: %[[mask4:.*]] = and i56 %[[insert3]], 1099511627775 ; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[shift4]] -- cgit v1.2.3 From a713918d443b76eb00ddac838545b26d3ee8dec4 Mon Sep 17 00:00:00 2001 From: Pawel Wodnicki Date: Thu, 13 Dec 2012 22:03:18 +0000 Subject: Partial update for release notes git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_32@170151 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/ReleaseNotes.html | 87 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 65 insertions(+), 22 deletions(-) diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index a84fabaccf..611d5c8df9 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -29,12 +29,6 @@

Written by the LLVM Team

-

These are in-progress notes for the upcoming LLVM 3.2 -release.
-You may prefer the -LLVM 3.1 -Release Notes.

-

Introduction @@ -98,7 +92,9 @@ Release Notes.

In the LLVM 3.2 time-frame, the Clang team has made many improvements. Highlights include:

    -
  • ...
  • +
  • Improvements to Clang's diagnostics
  • +
  • Support for tls_model attribute
  • +
  • Type safety attributes

For more details about the changes to Clang since the 3.1 release, see the @@ -142,7 +138,8 @@ Release Notes.

-

The new LLVM compiler-rt project + +

The LLVM compiler-rt project is a simple library that provides an implementation of the low-level target-specific hooks required by code generation and other runtime components. For example, when compiling for a 32-bit target, converting a @@ -154,7 +151,12 @@ Release Notes.

The 3.2 release has the following notable changes:

    -
  • ...
  • +
  • ThreadSanitizer (TSan) - data race detector run-time library for C/C++ has been added.
  • +
  • Improvemens to AddressSanitizer including: increasing stack size limit to 256M, + better portability (iOS6,Windows,Android NDK), support for cmake based builds, enhanced error reporting.
  • + +
  • Added support for A6 'Swift' CPU.
  • +
  • divsi3 function has been enhanced to take advantage of a hardware unsigned divide when it is available.
@@ -235,7 +237,12 @@ Release Notes.

Within the LLVM 3.2 time-frame there were the following highlights:

    -
  • ...
  • +
  • isl, the integer set library used by Polly, was relicensed to the MIT license
  • +
  • isl based code generation
  • +
  • MIT licensed replacement for CLooG (LGPLv2)
  • +
  • Fine grained option handling (separation of core and border computations, control overhead vs. code size)
  • +
  • Support for FORTRAN and dragonegg
  • +
  • OpenMP code generation fixes
@@ -433,9 +440,9 @@ Release Notes.

LLVM 3.2 includes several major changes and big features:

    -
  • ...
  • -
  • New NVPTX back-end (replacing existing PTX back-end) based on NVIDIA - sources
  • +
  • Loop Vectorizer.
  • +
  • New implementation of SROA.
  • +
  • New NVPTX back-end (replacing existing PTX back-end) based on NVIDIA sources.
@@ -454,7 +461,10 @@ Release Notes.
  • Thread local variables may have a specified TLS model. See the Language Reference Manual.
  • -
  • ...
  • +
  • 'TYPE_CODE_FUNCTION_OLD' type code and autoupgrade code for old function attributes format has been removed.
  • +
  • Internal representation of the Attributes class has been converted into a pointer to an + opaque object that's uniqued by and stored in the LLVMContext object. + The Attributes class then becomes a thin wrapper around this opaque object.
@@ -492,7 +502,7 @@ Release Notes.
  • The inner most loops must have a single basic block.
  • The number of iterations are known before the loop starts to execute.
  • -
  • The loop counter needs to be incrimented by one.
  • +
  • The loop counter needs to be incremented by one.
  • The loop trip count can be a variable.
  • Loops do not need to start at zero.
  • The induction variable can be used inside the loop.
  • @@ -527,8 +537,19 @@ Release Notes. Intro to the LLVM MC Project Blog Post.

    -
      -
    • ...
    • +
        +
      • Added support for following assembler directives: .ifb, .ifnb, .ifc, + .ifnc, .purgem, .rept and .version (ELF) as well as Darwin specific + .pushsection, .popsection and .previous .
      • +
      • Enhanced handling of .lcomm directive.
      • +
      • MS style inline assembler: added implementation of the offset and TYPE operators.
      • +
      • Targets can specify minimum supported NOP size for NOP padding.
      • +
      • ELF improvements: added support for generating ELF objects on Windows.
      • +
      • MachO improvements: symbol-difference variables are marked as N_ABS, added direct-to-object attribute for data-in-code markers.
      • +
      • Added support for annotated disassembly output for x86 and arm targets.
      • +
      • Arm support has been improved by adding support for ARM TARGET2 relocation + and fixing hadling of ARM-style "$d.*" labels.
      • +
      • Implemented local-exec TLS on PowerPC.
      @@ -591,7 +612,7 @@ Release Notes.

      New features and major changes in the X86 target include:

        -
      • ...
      • +
      • Small codegen optimizations, especially for AVX2.
      @@ -606,7 +627,7 @@ Release Notes.

      New features of the ARM target include:

        -
      • ...
      • +
      • Support and performance tuning for the A6 'Swift' CPU.
      @@ -643,7 +664,31 @@ Release Notes.

      New features and major changes in the MIPS target include:

        -
      • ...
      • +
      • Integrated assembler support: + MIPS32 works for both PIC and static, known limitation is the PR14456 where + R_MIPS_GPREL16 relocation is generated with the wrong addend. + MIPS64 support is incomplete, for example exception handling is not working.
      • +
      • Support for fast calling convention has been added.
      • +
      • Support for Android MIPS toolchain has been added to clang driver.
      • +
      • Added clang driver support for MIPS N32 ABI through "-mabi=n32" option.
      • +
      • MIPS32 and MIPS64 disassembler has been implemented.
      • +
      • Support for compiling programs with large GOTs (exceeding 64kB in size) has be added + through llc option "-mxgot".
      • +
      • Experimental support for MIPS32 DSP intrinsics has been added
      • +
      • Experimental support for MIPS16 with following limitations: only soft float is supported, + C++ exceptions are not supported, large stack frames (> 32000 bytes) are not supported, + direct object code emission is not supported yet (only .s).
      • +
      • Standalone assembler (llvm-mc): implementation is in progress and assembler should be + considered experimental
      • +
      • All classic JIT and MCJIT tests pass on Little and Big Endian MIPS32 platforms.
      • +
      • Inline asm support: all common constraints and operand modifiers + have been implemented.
      • +
      • Tail call optimization support has been added, use llc option "-enable-mips-tail-calls" + or clang options "-mllvm -enable-mips-tail-calls"to enable it.
      • +
      • Improved register allocation by removing registers 'FP', 'GP', 'RA' and 'AT' from the list of reserved registers.
      • +
      • Long branch expansion pass has been implemented, which expands branch + instructions with offsets that do not fit in the 16-bit field.
      • +
      • Cavium Octeon II board is used for testing builds (llvm-mips-linux builder).
      @@ -655,7 +700,6 @@ Release Notes.
      -

        Many fixes and changes across LLVM (and Clang) for better compliance with the 64-bit PowerPC ELF Application Binary Interface, interoperability with GCC, and overall 64-bit PowerPC support. Some highlights include:

        @@ -684,7 +728,6 @@ Release Notes.

        There have also been code generation improvements for both 32- and 64-bit code. Instruction scheduling support for the Freescale e500mc and e5500 cores has been added.

        -
      -- cgit v1.2.3 From 2e0b25a6327f14cbf0fffb9df0251ca32408a52b Mon Sep 17 00:00:00 2001 From: Pawel Wodnicki Date: Fri, 14 Dec 2012 05:43:03 +0000 Subject: More updates to relase notes git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_32@170178 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/ReleaseNotes.html | 88 +++++++++++++++++++++----------------------------- 1 file changed, 37 insertions(+), 51 deletions(-) diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index 611d5c8df9..99bdf613a0 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -40,7 +40,7 @@

      This document contains the release notes for the LLVM Compiler Infrastructure, release 3.2. Here we describe the status of LLVM, including major improvements from the previous release, improvements in various - subprojects of LLVM, and some of the current users of the code. All LLVM + sub-projects of LLVM, and some of the current users of the code. All LLVM releases may be downloaded from the LLVM releases web site.

      @@ -71,7 +71,7 @@ code generators and supporting tools, as well as Clang, DragonEgg and compiler-rt sub-project repositories. In addition to this code, the LLVM Project includes other sub-projects that are in development. Here we - include updates on these subprojects.

      + include updates on these sub-projects.

      @@ -85,9 +85,7 @@ experience through expressive diagnostics, a high level of conformance to language standards, fast compilation, and low memory use. Like LLVM, Clang provides a modular, library-based architecture that makes it suitable for - creating or integrating with other development tools. Clang is considered a - production-quality compiler for C, Objective-C, C++ and Objective-C++ on x86 - (32- and 64-bit), and for Darwin/ARM targets.

      + creating or integrating with other development tools.

      In the LLVM 3.2 time-frame, the Clang team has made many improvements. Highlights include:

      @@ -98,7 +96,7 @@

    For more details about the changes to Clang since the 3.1 release, see the - Clang release + Clang 3.2 release notes.

    If Clang rejects your code but another compiler accepts it, please take a @@ -152,8 +150,8 @@

    • ThreadSanitizer (TSan) - data race detector run-time library for C/C++ has been added.
    • -
    • Improvemens to AddressSanitizer including: increasing stack size limit to 256M, - better portability (iOS6,Windows,Android NDK), support for cmake based builds, enhanced error reporting.
    • +
    • Improvemens to AddressSanitizer including: increasing stack size limit to 256MB, + better portability (Android NDK), support for cmake based builds, enhanced error reporting.
    • Added support for A6 'Swift' CPU.
    • divsi3 function has been enhanced to take advantage of a hardware unsigned divide when it is available.
    • @@ -230,19 +228,19 @@

      Polly is an experimental optimizer for data locality and parallelism. It currently provides high-level - loop optimizations and automatic parallelisation (using the OpenMP run time). + loop optimizations and automatic parallelization (using the OpenMP run time). Work in the area of automatic SIMD and accelerator code generation was started.

      Within the LLVM 3.2 time-frame there were the following highlights:

        -
      • isl, the integer set library used by Polly, was relicensed to the MIT license
      • -
      • isl based code generation
      • -
      • MIT licensed replacement for CLooG (LGPLv2)
      • -
      • Fine grained option handling (separation of core and border computations, control overhead vs. code size)
      • -
      • Support for FORTRAN and dragonegg
      • -
      • OpenMP code generation fixes
      • +
      • isl, the integer set library used by Polly, was relicensed under the MIT license.
      • +
      • isl based code generation.
      • +
      • MIT licensed replacement for CLooG (LGPLv2).
      • +
      • Fine grained option handling (separation of core and border computations, control overhead vs. code size).
      • +
      • Support for FORTRAN and Dragonegg.
      • +
      • OpenMP code generation fixes.
      @@ -282,7 +280,7 @@ AUdio STream. Its programming model combines two approaches: functional programming and block diagram composition. In addition with the C, C++, Java, JavaScript output formats, the Faust compiler can generate LLVM bitcode, and - works with LLVM 2.7-3.1.

      + works with LLVM 2.7-3.2.

      @@ -672,20 +670,18 @@
    • Support for Android MIPS toolchain has been added to clang driver.
    • Added clang driver support for MIPS N32 ABI through "-mabi=n32" option.
    • MIPS32 and MIPS64 disassembler has been implemented.
    • -
    • Support for compiling programs with large GOTs (exceeding 64kB in size) has be added - through llc option "-mxgot".
    • -
    • Experimental support for MIPS32 DSP intrinsics has been added
    • +
    • Support for compiling programs with large GOTs (exceeding 64kB in size) has been added + through llc option "-mxgot".
    • +
    • Added experimental support for MIPS32 DSP intrinsics.
    • Experimental support for MIPS16 with following limitations: only soft float is supported, C++ exceptions are not supported, large stack frames (> 32000 bytes) are not supported, - direct object code emission is not supported yet (only .s).
    • -
    • Standalone assembler (llvm-mc): implementation is in progress and assembler should be - considered experimental
    • + direct object code emission is not supported only .s . +
    • Standalone assembler (llvm-mc): implementation is in progress and considered experimental.
    • All classic JIT and MCJIT tests pass on Little and Big Endian MIPS32 platforms.
    • -
    • Inline asm support: all common constraints and operand modifiers - have been implemented.
    • -
    • Tail call optimization support has been added, use llc option "-enable-mips-tail-calls" +
    • Inline asm support: all common constraints and operand modifiers have been implemented.
    • +
    • Added tail call optimization support, use llc option "-enable-mips-tail-calls" or clang options "-mllvm -enable-mips-tail-calls"to enable it.
    • -
    • Improved register allocation by removing registers 'FP', 'GP', 'RA' and 'AT' from the list of reserved registers.
    • +
    • Improved register allocation by removing registers $fp, $gp, $ra and $at from the list of reserved registers.
    • Long branch expansion pass has been implemented, which expands branch instructions with offsets that do not fit in the 16-bit field.
    • Cavium Octeon II board is used for testing builds (llvm-mips-linux builder).
    • @@ -777,9 +773,11 @@ from the previous release.

        -
      • ...
      • -
      - +
    • llvm-ld and llvm-stub have been removed, llvm-ld functionality can be partially replaced by + llvm-link | opt | {llc | as, llc -filetype=obj} | ld, or fully replaced by Clang.
    • +
    • MCJIT: added support for inline assembly (requires asm parser), added faux remote target execution to lli option '-remote-mcjit'.
    • +
    + @@ -813,34 +811,22 @@ to remove a dependency on Target.

    -

    In addition, some tools have changed in this release. Some of the changes - are:

    +

    In addition, some tools have changed in this release. Some of the changes are:

      -
    • ...
    • +
    • opt: added support for '-mtriple' option.
    • +
    • llvm-mc : - added '-disassemble' support for '-show-inst' and '-show-encoding' options, added '-edis' option to produce annotated + disassembly output for X86 and ARM targets.
    • +
    • libprofile: allows the profile data file name to be specified by the LLVMPROF_OUTPUT environment variable.
    • +
    • llvm-objdump: has been changed to display available targets, '-arch' option accepts x86 and x86-64 as valid arch names.
    • +
    • llc and opt: added FMA formation from pairs of FADD + FMUL or FSUB + FMUL enabled by option '-enable-excess-fp-precision' or option '-enable-unsafe-fp-math', + option '-fp-contract' controls the creation by optimizations of fused FP by selecting Fast, Standard, or Strict mode.
    • +
    • llc: object file output from llc is no longer considered experimental.
    - -

    -Python Bindings -

    - -
    - -

    Officially supported Python bindings have been added! Feature support is far - from complete. The current bindings support interfaces to:

    - -
      -
    • ...
    • -
    - -
    - - -

    Known Problems @@ -861,7 +847,7 @@ to remove a dependency on Target.

    Known problem areas include:

      -
    • The CellSPU, MSP430, and XCore backends are experimental.
    • +
    • The CellSPU, MSP430, and XCore backends are experimental, and the CellSPU backend will be removed in LLVM 3.3.
    • The integrated assembler, disassembler, and JIT is not supported by several targets. If an integrated assembler is not supported, then a -- cgit v1.2.3