Skip to content

16 bit multiplication always produces zero result #129

Closed
@carlos4242

Description

@carlos4242

Another one from the swift team. I'm surprised no one has found this yet.

@llvm.umul.with.overflow.i16(i16 , i16 ) seems to be lowering to invalid assembly, specifically it ends up moving the two 16 bit values into the top two bytes of 32 bit numbers then using __mulsi3 to multiply them together and put the result into a 32 bit number, where it takes the top two bytes. This will always produce zero.

In my opinion if it's using __mulsi3, it should be putting two 16 bit numbers into the bottom two bytes of each input and taking the bottom two bytes of the output, then any non zero value in the top two bytes after multiplication should be interpreted as an overflow and the flag set accordingly.

Here's the llvm ir in a test case as a patch to llvm...

diff --git a/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll b/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
new file mode 100644
index 00000000000..12c4030f943
--- /dev/null
+++ b/test/CodeGen/AVR/umul.with.overflow.i16-bug.ll
@@ -0,0 +1,39 @@
+; RUN: llc -O1 < %s -march=avr | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9"
+
+%Vs6UInt16 = type <{ i16 }>
+%Sb = type <{ i1 }>
+
+define hidden void @_TF4main13setServoAngleFT5angleVs6UInt16_T_(i16) #0 {
+entry:
+  %adjustedAngle = alloca %Vs6UInt16, align 2
+  %1 = bitcast %Vs6UInt16* %adjustedAngle to i8*
+  %adjustedAngle._value = getelementptr inbounds %Vs6UInt16, %Vs6UInt16* %adjustedAngle, i32 0, i32 0
+  store i16 %0, i16* %adjustedAngle._value, align 2
+
+;print(unsignedInt: adjustedAngle &* UInt16(11))
+; breaks here
+  %adjustedAngle._value2 = getelementptr inbounds %Vs6UInt16, %Vs6UInt16* %adjustedAngle, i32 0, i32 0
+  %2 = load i16, i16* %adjustedAngle._value2, align 2
+
+  %3 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %2, i16 11)
+  %4 = extractvalue { i16, i1 } %3, 0
+  %5 = extractvalue { i16, i1 } %3, 1
+
+  ; above code looks fine, how is it lowered?
+  %6 = call i1 @_TIF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_A0_()
+  call void @_TF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_(i16 %4, i1 %6)
+
+  ret void
+}
+
+declare void @_TF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_(i16, i1) #0
+declare i1 @_TIF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_A0_() #0
+
+; Function Attrs: nounwind readnone speculatable
+declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16) #2
+
+attributes #0 = { "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="core2" "target-features"="+ssse3,+cx16,+fxsr,+mmx,+x87,+sse,+sse2,+sse3" }
+attributes #2 = { nounwind readnone speculatable }

(Note that I haven't yet put in the FileCheck directives because I haven't decided/worked out what the compiler should be doing here yet!)

FYI the original source, with debug statements in was something like:

func setServoAngle(angle: UInt16) {
  var adjustedAngle: UInt16 = angle
  print(unsignedInt: adjustedAngle &* UInt16(11))
}

This produces the following assembly...

	.text
	.file	"<stdin>"
	.hidden	_TF4main13setServoAngleFT5angleVs6UInt16_T_ ; -- Begin function _TF4main13setServoAngleFT5angleVs6UInt16_T_
	.globl	_TF4main13setServoAngleFT5angleVs6UInt16_T_
	.p2align	1
	.type	_TF4main13setServoAngleFT5angleVs6UInt16_T_,@function
_TF4main13setServoAngleFT5angleVs6UInt16_T_: ; @_TF4main13setServoAngleFT5angleVs6UInt16_T_
; %bb.0:                                ; %entry
	push	r28
	push	r29
	push	r16
	push	r17
	in	r28, 61
	in	r29, 62
	sbiw	r28, 2
	in	r0, 63
	cli
	out	62, r29
	out	63, r0
	out	61, r28
	std	Y+1, r24
	std	Y+2, r25
	ldi	r20, 11
	ldi	r21, 0
	ldi	r18, 0
	ldi	r19, 0
	mov	r22, r18
	mov	r23, r19
	call	__mulsi3
	mov	r16, r24
	mov	r17, r25
	call	_TIF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_A0_
	mov	r22, r24
	mov	r24, r16
	mov	r25, r17
	call	_TF3AVR5printFT11unsignedIntVs6UInt1610addNewlineSb_T_
	adiw	r28, 2
	in	r0, 63
	cli
	out	62, r29
	out	63, r0
	out	61, r28
	pop	r17
	pop	r16
	pop	r29
	pop	r28
	ret
.Lfunc_end0:
	.size	_TF4main13setServoAngleFT5angleVs6UInt16_T_, .Lfunc_end0-_TF4main13setServoAngleFT5angleVs6UInt16_T_
                                        ; -- End function

From what I know, __mulsi3 takes the 32 bit value in r18-r21, multiplies it by the 32 bit value in r22-r25 and stores the result in the 32 bit value r22-r25. (I'm not sure how it detects overflow.)

It should be multiplying the input value, e.g. 90 by 11 then printing out the result. Instead it always prints 0.

As I read this assembly, it's moving both input values into the top two bytes of 32 bit numbers, which looks broken. Either it should move them to the bottom two bytes or use a different function.

I'd love to investigate this but I'll need to get some pointers from people where this is all happening. I couldn't even find mulsi3 by grepping through llvm source code. No idea how this is made!

Metadata

Metadata

Assignees

No one assigned

    Labels

    has-local-patchA patch exists but has not been applied to upstream LLVM

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions