From 4cd18f98f724bef7e018ba64aaece5dab949f648 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Thu, 13 Jan 2011 22:08:15 +0000 Subject: memcpy + metadata = bliss :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123405 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/README.txt | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'lib/Target/README.txt') diff --git a/lib/Target/README.txt b/lib/Target/README.txt index c3a9330ba6..16be2e9004 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2259,3 +2259,51 @@ Since we know that x+2.0 doesn't care about the sign of any zeros in X, we can transform the fmul to 0.0, and then the fadd to 2.0. //===---------------------------------------------------------------------===// + +We should enhance memcpy/memcpy/memset to allow a metadata node on them +indicating that some bytes of the transfer are undefined. This is useful for +frontends like clang when lowering struct lowering, when some elements of the +struct are undefined. Consider something like this: + +struct x { + char a; + int b[4]; +}; +void foo(struct x*P); +struct x testfunc() { + struct x V1, V2; + foo(&V1); + V2 = V1; + + return V2; +} + +We currently compile this to: +$ clang t.c -S -o - -O0 -emit-llvm | opt -scalarrepl -S + + +%struct.x = type { i8, [4 x i32] } + +define void @testfunc(%struct.x* sret %agg.result) nounwind ssp { +entry: + %V1 = alloca %struct.x, align 4 + call void @foo(%struct.x* %V1) + %tmp1 = bitcast %struct.x* %V1 to i8* + %0 = bitcast %struct.x* %V1 to i160* + %srcval1 = load i160* %0, align 4 + %tmp2 = bitcast %struct.x* %agg.result to i8* + %1 = bitcast %struct.x* %agg.result to i160* + store i160 %srcval1, i160* %1, align 4 + ret void +} + +This happens because SRoA sees that the temp alloca has is being memcpy'd into +and out of and it has holes and it has to be conservative. If we knew about the +holes, then this could be much much better. + +Having information about these holes would also improve memcpy (etc) lowering at +llc time when it gets inlined, because we can use smaller transfers. This also +avoids partial register stalls in some important cases. + +//===---------------------------------------------------------------------===// + -- cgit v1.2.3