Index: src/cmd/compile/internal/ssa/rewrite386.go
--- src/cmd/compile/internal/ssa/rewrite386.go.orig
+++ src/cmd/compile/internal/ssa/rewrite386.go
@@ -8924,6 +8924,7 @@ func rewriteValue386_OpMove(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
+	config := b.Func.Config
 	typ := &b.Func.Config.Types
 	// match: (Move [0] _ _ mem)
 	// result: mem
@@ -9112,14 +9113,14 @@ func rewriteValue386_OpMove(v *Value) bool {
 		return true
 	}
 	// match: (Move [s] dst src mem)
-	// cond: s > 8 && s <= 4*128 && s%4 == 0 && logLargeCopy(v, s)
+	// cond: s > 8 && s <= 4*128 && s%4 == 0 && !config.noDuffDevice && logLargeCopy(v, s)
 	// result: (DUFFCOPY [10*(128-s/4)] dst src mem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
 		dst := v_0
 		src := v_1
 		mem := v_2
-		if !(s > 8 && s <= 4*128 && s%4 == 0 && logLargeCopy(v, s)) {
+		if !(s > 8 && s <= 4*128 && s%4 == 0 && !config.noDuffDevice && logLargeCopy(v, s)) {
 			break
 		}
 		v.reset(Op386DUFFCOPY)
@@ -9128,14 +9129,14 @@ func rewriteValue386_OpMove(v *Value) bool {
 		return true
 	}
 	// match: (Move [s] dst src mem)
-	// cond: s > 4*128 && s%4 == 0 && logLargeCopy(v, s)
+	// cond: (s > 4*128 || config.noDuffDevice) && s%4 == 0 && logLargeCopy(v, s)
 	// result: (REPMOVSL dst src (MOVLconst [int32(s/4)]) mem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
 		dst := v_0
 		src := v_1
 		mem := v_2
-		if !(s > 4*128 && s%4 == 0 && logLargeCopy(v, s)) {
+		if !((s > 4*128 || config.noDuffDevice) && s%4 == 0 && logLargeCopy(v, s)) {
 			break
 		}
 		v.reset(Op386REPMOVSL)
@@ -10574,6 +10575,7 @@ func rewriteValue386_OpZero(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
+	config := b.Func.Config
 	typ := &b.Func.Config.Types
 	// match: (Zero [0] _ mem)
 	// result: mem
@@ -10767,13 +10769,13 @@ func rewriteValue386_OpZero(v *Value) bool {
 		return true
 	}
 	// match: (Zero [s] destptr mem)
-	// cond: s > 16 && s <= 4*128 && s%4 == 0
+	// cond: s > 16 && s <= 4*128 && s%4 == 0 && !config.noDuffDevice
 	// result: (DUFFZERO [1*(128-s/4)] destptr (MOVLconst [0]) mem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
 		destptr := v_0
 		mem := v_1
-		if !(s > 16 && s <= 4*128 && s%4 == 0) {
+		if !(s > 16 && s <= 4*128 && s%4 == 0 && !config.noDuffDevice) {
 			break
 		}
 		v.reset(Op386DUFFZERO)
@@ -10784,13 +10786,13 @@ func rewriteValue386_OpZero(v *Value) bool {
 		return true
 	}
 	// match: (Zero [s] destptr mem)
-	// cond: s > 4*128 && s%4 == 0
+	// cond: (s > 4*128 || (config.noDuffDevice && s > 16)) && s%4 == 0
 	// result: (REPSTOSL destptr (MOVLconst [int32(s/4)]) (MOVLconst [0]) mem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
 		destptr := v_0
 		mem := v_1
-		if !(s > 4*128 && s%4 == 0) {
+		if !((s > 4*128 || (config.noDuffDevice && s > 16)) && s%4 == 0) {
 			break
 		}
 		v.reset(Op386REPSTOSL)
