1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
|
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ssa
import (
"cmd/compile/internal/logopt"
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/obj/s390x"
"cmd/internal/objabi"
"cmd/internal/src"
"encoding/binary"
"fmt"
"io"
"math"
"math/bits"
"os"
"path/filepath"
)
type deadValueChoice bool
const (
leaveDeadValues deadValueChoice = false
removeDeadValues = true
)
// deadcode indicates that rewrite should try to remove any values that become dead.
func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
// repeat rewrites until we find no more rewrites
pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
pendingLines.clear()
debug := f.pass.debug
if debug > 1 {
fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
}
for {
change := false
for _, b := range f.Blocks {
var b0 *Block
if debug > 1 {
b0 = new(Block)
*b0 = *b
b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
}
for i, c := range b.ControlValues() {
for c.Op == OpCopy {
c = c.Args[0]
b.ReplaceControl(i, c)
}
}
if rb(b) {
change = true
if debug > 1 {
fmt.Printf("rewriting %s -> %s\n", b0.LongString(), b.LongString())
}
}
for j, v := range b.Values {
var v0 *Value
if debug > 1 {
v0 = new(Value)
*v0 = *v
v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
}
if v.Uses == 0 && v.removeable() {
if v.Op != OpInvalid && deadcode == removeDeadValues {
// Reset any values that are now unused, so that we decrement
// the use count of all of its arguments.
// Not quite a deadcode pass, because it does not handle cycles.
// But it should help Uses==1 rules to fire.
v.reset(OpInvalid)
change = true
}
// No point rewriting values which aren't used.
continue
}
vchange := phielimValue(v)
if vchange && debug > 1 {
fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
}
// Eliminate copy inputs.
// If any copy input becomes unused, mark it
// as invalid and discard its argument. Repeat
// recursively on the discarded argument.
// This phase helps remove phantom "dead copy" uses
// of a value so that a x.Uses==1 rule condition
// fires reliably.
for i, a := range v.Args {
if a.Op != OpCopy {
continue
}
aa := copySource(a)
v.SetArg(i, aa)
// If a, a copy, has a line boundary indicator, attempt to find a new value
// to hold it. The first candidate is the value that will replace a (aa),
// if it shares the same block and line and is eligible.
// The second option is v, which has a as an input. Because aa is earlier in
// the data flow, it is the better choice.
if a.Pos.IsStmt() == src.PosIsStmt {
if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
aa.Pos = aa.Pos.WithIsStmt()
} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
v.Pos = v.Pos.WithIsStmt()
} else {
// Record the lost line and look for a new home after all rewrites are complete.
// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
// line to appear in more than one block, but only one block is stored, so if both end
// up here, then one will be lost.
pendingLines.set(a.Pos, int32(a.Block.ID))
}
a.Pos = a.Pos.WithNotStmt()
}
vchange = true
for a.Uses == 0 {
b := a.Args[0]
a.reset(OpInvalid)
a = b
}
}
if vchange && debug > 1 {
fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
}
// apply rewrite function
if rv(v) {
vchange = true
// If value changed to a poor choice for a statement boundary, move the boundary
if v.Pos.IsStmt() == src.PosIsStmt {
if k := nextGoodStatementIndex(v, j, b); k != j {
v.Pos = v.Pos.WithNotStmt()
b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
}
}
}
change = change || vchange
if vchange && debug > 1 {
fmt.Printf("rewriting %s -> %s\n", v0.LongString(), v.LongString())
}
}
}
if !change {
break
}
}
// remove clobbered values
for _, b := range f.Blocks {
j := 0
for i, v := range b.Values {
vl := v.Pos
if v.Op == OpInvalid {
if v.Pos.IsStmt() == src.PosIsStmt {
pendingLines.set(vl, int32(b.ID))
}
f.freeValue(v)
continue
}
if v.Pos.IsStmt() != src.PosNotStmt && pendingLines.get(vl) == int32(b.ID) {
pendingLines.remove(vl)
v.Pos = v.Pos.WithIsStmt()
}
if i != j {
b.Values[j] = v
}
j++
}
if pendingLines.get(b.Pos) == int32(b.ID) {
b.Pos = b.Pos.WithIsStmt()
pendingLines.remove(b.Pos)
}
b.truncateValues(j)
}
}
// Common functions called from rewriting rules
func is64BitFloat(t *types.Type) bool {
return t.Size() == 8 && t.IsFloat()
}
func is32BitFloat(t *types.Type) bool {
return t.Size() == 4 && t.IsFloat()
}
func is64BitInt(t *types.Type) bool {
return t.Size() == 8 && t.IsInteger()
}
func is32BitInt(t *types.Type) bool {
return t.Size() == 4 && t.IsInteger()
}
func is16BitInt(t *types.Type) bool {
return t.Size() == 2 && t.IsInteger()
}
func is8BitInt(t *types.Type) bool {
return t.Size() == 1 && t.IsInteger()
}
func isPtr(t *types.Type) bool {
return t.IsPtrShaped()
}
func isSigned(t *types.Type) bool {
return t.IsSigned()
}
// mergeSym merges two symbolic offsets. There is no real merging of
// offsets, we just pick the non-nil one.
func mergeSym(x, y Sym) Sym {
if x == nil {
return y
}
if y == nil {
return x
}
panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
}
func canMergeSym(x, y Sym) bool {
return x == nil || y == nil
}
// canMergeLoadClobber reports whether the load can be merged into target without
// invalidating the schedule.
// It also checks that the other non-load argument x is something we
// are ok with clobbering.
func canMergeLoadClobber(target, load, x *Value) bool {
// The register containing x is going to get clobbered.
// Don't merge if we still need the value of x.
// We don't have liveness information here, but we can
// approximate x dying with:
// 1) target is x's only use.
// 2) target is not in a deeper loop than x.
if x.Uses != 1 {
return false
}
loopnest := x.Block.Func.loopnest()
loopnest.calculateDepths()
if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
return false
}
return canMergeLoad(target, load)
}
// canMergeLoad reports whether the load can be merged into target without
// invalidating the schedule.
func canMergeLoad(target, load *Value) bool {
if target.Block.ID != load.Block.ID {
// If the load is in a different block do not merge it.
return false
}
// We can't merge the load into the target if the load
// has more than one use.
if load.Uses != 1 {
return false
}
mem := load.MemoryArg()
// We need the load's memory arg to still be alive at target. That
// can't be the case if one of target's args depends on a memory
// state that is a successor of load's memory arg.
//
// For example, it would be invalid to merge load into target in
// the following situation because newmem has killed oldmem
// before target is reached:
// load = read ... oldmem
// newmem = write ... oldmem
// arg0 = read ... newmem
// target = add arg0 load
//
// If the argument comes from a different block then we can exclude
// it immediately because it must dominate load (which is in the
// same block as target).
var args []*Value
for _, a := range target.Args {
if a != load && a.Block.ID == target.Block.ID {
args = append(args, a)
}
}
// memPreds contains memory states known to be predecessors of load's
// memory state. It is lazily initialized.
var memPreds map[*Value]bool
for i := 0; len(args) > 0; i++ {
const limit = 100
if i >= limit {
// Give up if we have done a lot of iterations.
return false
}
v := args[len(args)-1]
args = args[:len(args)-1]
if target.Block.ID != v.Block.ID {
// Since target and load are in the same block
// we can stop searching when we leave the block.
continue
}
if v.Op == OpPhi {
// A Phi implies we have reached the top of the block.
// The memory phi, if it exists, is always
// the first logical store in the block.
continue
}
if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
// We could handle this situation however it is likely
// to be very rare.
return false
}
if v.Op.SymEffect()&SymAddr != 0 {
// This case prevents an operation that calculates the
// address of a local variable from being forced to schedule
// before its corresponding VarDef.
// See issue 28445.
// v1 = LOAD ...
// v2 = VARDEF
// v3 = LEAQ
// v4 = CMPQ v1 v3
// We don't want to combine the CMPQ with the load, because
// that would force the CMPQ to schedule before the VARDEF, which
// in turn requires the LEAQ to schedule before the VARDEF.
return false
}
if v.Type.IsMemory() {
if memPreds == nil {
// Initialise a map containing memory states
// known to be predecessors of load's memory
// state.
memPreds = make(map[*Value]bool)
m := mem
const limit = 50
for i := 0; i < limit; i++ {
if m.Op == OpPhi {
// The memory phi, if it exists, is always
// the first logical store in the block.
break
}
if m.Block.ID != target.Block.ID {
break
}
if !m.Type.IsMemory() {
break
}
memPreds[m] = true
if len(m.Args) == 0 {
break
}
m = m.MemoryArg()
}
}
// We can merge if v is a predecessor of mem.
//
// For example, we can merge load into target in the
// following scenario:
// x = read ... v
// mem = write ... v
// load = read ... mem
// target = add x load
if memPreds[v] {
continue
}
return false
}
if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
// If v takes mem as an input then we know mem
// is valid at this point.
continue
}
for _, a := range v.Args {
if target.Block.ID == a.Block.ID {
args = append(args, a)
}
}
}
return true
}
// isSameCall reports whether sym is the same as the given named symbol
func isSameCall(sym interface{}, name string) bool {
fn := sym.(*AuxCall).Fn
return fn != nil && fn.String() == name
}
// nlz returns the number of leading zeros.
func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
func nlz8(x int8) int { return bits.LeadingZeros8(uint8(x)) }
// ntzX returns the number of trailing zeros.
func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
func ntz8(x int8) int { return bits.TrailingZeros8(uint8(x)) }
func oneBit(x int64) bool { return x&(x-1) == 0 && x != 0 }
func oneBit8(x int8) bool { return x&(x-1) == 0 && x != 0 }
func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
// nto returns the number of trailing ones.
func nto(x int64) int64 {
return int64(ntz64(^x))
}
// logX returns logarithm of n base 2.
// n must be a positive power of 2 (isPowerOfTwoX returns true).
func log8(n int8) int64 {
return int64(bits.Len8(uint8(n))) - 1
}
func log16(n int16) int64 {
return int64(bits.Len16(uint16(n))) - 1
}
func log32(n int32) int64 {
return int64(bits.Len32(uint32(n))) - 1
}
func log64(n int64) int64 {
return int64(bits.Len64(uint64(n))) - 1
}
// log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
// Rounds down.
func log2uint32(n int64) int64 {
return int64(bits.Len32(uint32(n))) - 1
}
// isPowerOfTwo functions report whether n is a power of 2.
func isPowerOfTwo8(n int8) bool {
return n > 0 && n&(n-1) == 0
}
func isPowerOfTwo16(n int16) bool {
return n > 0 && n&(n-1) == 0
}
func isPowerOfTwo32(n int32) bool {
return n > 0 && n&(n-1) == 0
}
func isPowerOfTwo64(n int64) bool {
return n > 0 && n&(n-1) == 0
}
// isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
func isUint64PowerOfTwo(in int64) bool {
n := uint64(in)
return n > 0 && n&(n-1) == 0
}
// isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
func isUint32PowerOfTwo(in int64) bool {
n := uint64(uint32(in))
return n > 0 && n&(n-1) == 0
}
// is32Bit reports whether n can be represented as a signed 32 bit integer.
func is32Bit(n int64) bool {
return n == int64(int32(n))
}
// is16Bit reports whether n can be represented as a signed 16 bit integer.
func is16Bit(n int64) bool {
return n == int64(int16(n))
}
// is8Bit reports whether n can be represented as a signed 8 bit integer.
func is8Bit(n int64) bool {
return n == int64(int8(n))
}
// isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
func isU8Bit(n int64) bool {
return n == int64(uint8(n))
}
// isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
func isU12Bit(n int64) bool {
return 0 <= n && n < (1<<12)
}
// isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
func isU16Bit(n int64) bool {
return n == int64(uint16(n))
}
// isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
func isU32Bit(n int64) bool {
return n == int64(uint32(n))
}
// is20Bit reports whether n can be represented as a signed 20 bit integer.
func is20Bit(n int64) bool {
return -(1<<19) <= n && n < (1<<19)
}
// b2i translates a boolean value to 0 or 1 for assigning to auxInt.
func b2i(b bool) int64 {
if b {
return 1
}
return 0
}
// b2i32 translates a boolean value to 0 or 1.
func b2i32(b bool) int32 {
if b {
return 1
}
return 0
}
// shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
// A shift is bounded if it is shifting by less than the width of the shifted value.
func shiftIsBounded(v *Value) bool {
return v.AuxInt != 0
}
// truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
// of the mantissa. It will panic if the truncation results in lost information.
func truncate64Fto32F(f float64) float32 {
if !isExactFloat32(f) {
panic("truncate64Fto32F: truncation is not exact")
}
if !math.IsNaN(f) {
return float32(f)
}
// NaN bit patterns aren't necessarily preserved across conversion
// instructions so we need to do the conversion manually.
b := math.Float64bits(f)
m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
// | sign | exponent | mantissa |
r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
return math.Float32frombits(r)
}
// extend32Fto64F converts a float32 value to a float64 value preserving the bit
// pattern of the mantissa.
func extend32Fto64F(f float32) float64 {
if !math.IsNaN(float64(f)) {
return float64(f)
}
// NaN bit patterns aren't necessarily preserved across conversion
// instructions so we need to do the conversion manually.
b := uint64(math.Float32bits(f))
// | sign | exponent | mantissa |
r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
return math.Float64frombits(r)
}
// DivisionNeedsFixUp reports whether the division needs fix-up code.
func DivisionNeedsFixUp(v *Value) bool {
return v.AuxInt == 0
}
// auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
func auxFrom64F(f float64) int64 {
if f != f {
panic("can't encode a NaN in AuxInt field")
}
return int64(math.Float64bits(f))
}
// auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
func auxFrom32F(f float32) int64 {
if f != f {
panic("can't encode a NaN in AuxInt field")
}
return int64(math.Float64bits(extend32Fto64F(f)))
}
// auxTo32F decodes a float32 from the AuxInt value provided.
func auxTo32F(i int64) float32 {
return truncate64Fto32F(math.Float64frombits(uint64(i)))
}
// auxTo64F decodes a float64 from the AuxInt value provided.
func auxTo64F(i int64) float64 {
return math.Float64frombits(uint64(i))
}
func auxIntToBool(i int64) bool {
if i == 0 {
return false
}
return true
}
func auxIntToInt8(i int64) int8 {
return int8(i)
}
func auxIntToInt16(i int64) int16 {
return int16(i)
}
func auxIntToInt32(i int64) int32 {
return int32(i)
}
func auxIntToInt64(i int64) int64 {
return i
}
func auxIntToUint8(i int64) uint8 {
return uint8(i)
}
func auxIntToFloat32(i int64) float32 {
return float32(math.Float64frombits(uint64(i)))
}
func auxIntToFloat64(i int64) float64 {
return math.Float64frombits(uint64(i))
}
func auxIntToValAndOff(i int64) ValAndOff {
return ValAndOff(i)
}
func auxIntToArm64BitField(i int64) arm64BitField {
return arm64BitField(i)
}
func auxIntToInt128(x int64) int128 {
if x != 0 {
panic("nonzero int128 not allowed")
}
return 0
}
func auxIntToFlagConstant(x int64) flagConstant {
return flagConstant(x)
}
func auxIntToOp(cc int64) Op {
return Op(cc)
}
func boolToAuxInt(b bool) int64 {
if b {
return 1
}
return 0
}
func int8ToAuxInt(i int8) int64 {
return int64(i)
}
func int16ToAuxInt(i int16) int64 {
return int64(i)
}
func int32ToAuxInt(i int32) int64 {
return int64(i)
}
func int64ToAuxInt(i int64) int64 {
return int64(i)
}
func uint8ToAuxInt(i uint8) int64 {
return int64(int8(i))
}
func float32ToAuxInt(f float32) int64 {
return int64(math.Float64bits(float64(f)))
}
func float64ToAuxInt(f float64) int64 {
return int64(math.Float64bits(f))
}
func valAndOffToAuxInt(v ValAndOff) int64 {
return int64(v)
}
func arm64BitFieldToAuxInt(v arm64BitField) int64 {
return int64(v)
}
func int128ToAuxInt(x int128) int64 {
if x != 0 {
panic("nonzero int128 not allowed")
}
return 0
}
func flagConstantToAuxInt(x flagConstant) int64 {
return int64(x)
}
func opToAuxInt(o Op) int64 {
return int64(o)
}
func auxToString(i interface{}) string {
return i.(string)
}
func auxToSym(i interface{}) Sym {
// TODO: kind of a hack - allows nil interface through
s, _ := i.(Sym)
return s
}
func auxToType(i interface{}) *types.Type {
return i.(*types.Type)
}
func auxToCall(i interface{}) *AuxCall {
return i.(*AuxCall)
}
func auxToS390xCCMask(i interface{}) s390x.CCMask {
return i.(s390x.CCMask)
}
func auxToS390xRotateParams(i interface{}) s390x.RotateParams {
return i.(s390x.RotateParams)
}
func stringToAux(s string) interface{} {
return s
}
func symToAux(s Sym) interface{} {
return s
}
func callToAux(s *AuxCall) interface{} {
return s
}
func typeToAux(t *types.Type) interface{} {
return t
}
func s390xCCMaskToAux(c s390x.CCMask) interface{} {
return c
}
func s390xRotateParamsToAux(r s390x.RotateParams) interface{} {
return r
}
// uaddOvf reports whether unsigned a+b would overflow.
func uaddOvf(a, b int64) bool {
return uint64(a)+uint64(b) < uint64(a)
}
// de-virtualize an InterCall
// 'sym' is the symbol for the itab
func devirt(v *Value, aux interface{}, sym Sym, offset int64) *AuxCall {
f := v.Block.Func
n, ok := sym.(*obj.LSym)
if !ok {
return nil
}
lsym := f.fe.DerefItab(n, offset)
if f.pass.debug > 0 {
if lsym != nil {
f.Warnl(v.Pos, "de-virtualizing call")
} else {
f.Warnl(v.Pos, "couldn't de-virtualize call")
}
}
if lsym == nil {
return nil
}
va := aux.(*AuxCall)
return StaticAuxCall(lsym, va.args, va.results)
}
// de-virtualize an InterLECall
// 'sym' is the symbol for the itab
func devirtLESym(v *Value, aux interface{}, sym Sym, offset int64) *obj.LSym {
n, ok := sym.(*obj.LSym)
if !ok {
return nil
}
f := v.Block.Func
lsym := f.fe.DerefItab(n, offset)
if f.pass.debug > 0 {
if lsym != nil {
f.Warnl(v.Pos, "de-virtualizing call")
} else {
f.Warnl(v.Pos, "couldn't de-virtualize call")
}
}
if lsym == nil {
return nil
}
return lsym
}
func devirtLECall(v *Value, sym *obj.LSym) *Value {
v.Op = OpStaticLECall
v.Aux.(*AuxCall).Fn = sym
v.RemoveArg(0)
return v
}
// isSamePtr reports whether p1 and p2 point to the same address.
func isSamePtr(p1, p2 *Value) bool {
if p1 == p2 {
return true
}
if p1.Op != p2.Op {
return false
}
switch p1.Op {
case OpOffPtr:
return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
case OpAddr, OpLocalAddr:
// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
// Checking for value equality only works after [z]cse has run.
return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
case OpAddPtr:
return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
}
return false
}
func isStackPtr(v *Value) bool {
for v.Op == OpOffPtr || v.Op == OpAddPtr {
v = v.Args[0]
}
return v.Op == OpSP || v.Op == OpLocalAddr
}
// disjoint reports whether the memory region specified by [p1:p1+n1)
// does not overlap with [p2:p2+n2).
// A return value of false does not imply the regions overlap.
func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
if n1 == 0 || n2 == 0 {
return true
}
if p1 == p2 {
return false
}
baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
base, offset = ptr, 0
for base.Op == OpOffPtr {
offset += base.AuxInt
base = base.Args[0]
}
return base, offset
}
p1, off1 := baseAndOffset(p1)
p2, off2 := baseAndOffset(p2)
if isSamePtr(p1, p2) {
return !overlap(off1, n1, off2, n2)
}
// p1 and p2 are not the same, so if they are both OpAddrs then
// they point to different variables.
// If one pointer is on the stack and the other is an argument
// then they can't overlap.
switch p1.Op {
case OpAddr, OpLocalAddr:
if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
return true
}
return p2.Op == OpArg && p1.Args[0].Op == OpSP
case OpArg:
if p2.Op == OpSP || p2.Op == OpLocalAddr {
return true
}
case OpSP:
return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpSP
}
return false
}
// moveSize returns the number of bytes an aligned MOV instruction moves
func moveSize(align int64, c *Config) int64 {
switch {
case align%8 == 0 && c.PtrSize == 8:
return 8
case align%4 == 0:
return 4
case align%2 == 0:
return 2
}
return 1
}
// mergePoint finds a block among a's blocks which dominates b and is itself
// dominated by all of a's blocks. Returns nil if it can't find one.
// Might return nil even if one does exist.
func mergePoint(b *Block, a ...*Value) *Block {
// Walk backward from b looking for one of the a's blocks.
// Max distance
d := 100
for d > 0 {
for _, x := range a {
if b == x.Block {
goto found
}
}
if len(b.Preds) > 1 {
// Don't know which way to go back. Abort.
return nil
}
b = b.Preds[0].b
d--
}
return nil // too far away
found:
// At this point, r is the first value in a that we find by walking backwards.
// if we return anything, r will be it.
r := b
// Keep going, counting the other a's that we find. They must all dominate r.
na := 0
for d > 0 {
for _, x := range a {
if b == x.Block {
na++
}
}
if na == len(a) {
// Found all of a in a backwards walk. We can return r.
return r
}
if len(b.Preds) > 1 {
return nil
}
b = b.Preds[0].b
d--
}
return nil // too far away
}
// clobber invalidates values. Returns true.
// clobber is used by rewrite rules to:
// A) make sure the values are really dead and never used again.
// B) decrement use counts of the values' args.
func clobber(vv ...*Value) bool {
for _, v := range vv {
v.reset(OpInvalid)
// Note: leave v.Block intact. The Block field is used after clobber.
}
return true
}
// clobberIfDead resets v when use count is 1. Returns true.
// clobberIfDead is used by rewrite rules to decrement
// use counts of v's args when v is dead and never used.
func clobberIfDead(v *Value) bool {
if v.Uses == 1 {
v.reset(OpInvalid)
}
// Note: leave v.Block intact. The Block field is used after clobberIfDead.
return true
}
// noteRule is an easy way to track if a rule is matched when writing
// new ones. Make the rule of interest also conditional on
// noteRule("note to self: rule of interest matched")
// and that message will print when the rule matches.
func noteRule(s string) bool {
fmt.Println(s)
return true
}
// countRule increments Func.ruleMatches[key].
// If Func.ruleMatches is non-nil at the end
// of compilation, it will be printed to stdout.
// This is intended to make it easier to find which functions
// which contain lots of rules matches when developing new rules.
func countRule(v *Value, key string) bool {
f := v.Block.Func
if f.ruleMatches == nil {
f.ruleMatches = make(map[string]int)
}
f.ruleMatches[key]++
return true
}
// warnRule generates compiler debug output with string s when
// v is not in autogenerated code, cond is true and the rule has fired.
func warnRule(cond bool, v *Value, s string) bool {
if pos := v.Pos; pos.Line() > 1 && cond {
v.Block.Func.Warnl(pos, s)
}
return true
}
// for a pseudo-op like (LessThan x), extract x
func flagArg(v *Value) *Value {
if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
return nil
}
return v.Args[0]
}
// arm64Negate finds the complement to an ARM64 condition code,
// for example !Equal -> NotEqual or !LessThan -> GreaterEqual
//
// For floating point, it's more subtle because NaN is unordered. We do
// !LessThanF -> NotLessThanF, the latter takes care of NaNs.
func arm64Negate(op Op) Op {
switch op {
case OpARM64LessThan:
return OpARM64GreaterEqual
case OpARM64LessThanU:
return OpARM64GreaterEqualU
case OpARM64GreaterThan:
return OpARM64LessEqual
case OpARM64GreaterThanU:
return OpARM64LessEqualU
case OpARM64LessEqual:
return OpARM64GreaterThan
case OpARM64LessEqualU:
return OpARM64GreaterThanU
case OpARM64GreaterEqual:
return OpARM64LessThan
case OpARM64GreaterEqualU:
return OpARM64LessThanU
case OpARM64Equal:
return OpARM64NotEqual
case OpARM64NotEqual:
return OpARM64Equal
case OpARM64LessThanF:
return OpARM64NotLessThanF
case OpARM64NotLessThanF:
return OpARM64LessThanF
case OpARM64LessEqualF:
return OpARM64NotLessEqualF
case OpARM64NotLessEqualF:
return OpARM64LessEqualF
case OpARM64GreaterThanF:
return OpARM64NotGreaterThanF
case OpARM64NotGreaterThanF:
return OpARM64GreaterThanF
case OpARM64GreaterEqualF:
return OpARM64NotGreaterEqualF
case OpARM64NotGreaterEqualF:
return OpARM64GreaterEqualF
default:
panic("unreachable")
}
}
// arm64Invert evaluates (InvertFlags op), which
// is the same as altering the condition codes such
// that the same result would be produced if the arguments
// to the flag-generating instruction were reversed, e.g.
// (InvertFlags (CMP x y)) -> (CMP y x)
func arm64Invert(op Op) Op {
switch op {
case OpARM64LessThan:
return OpARM64GreaterThan
case OpARM64LessThanU:
return OpARM64GreaterThanU
case OpARM64GreaterThan:
return OpARM64LessThan
case OpARM64GreaterThanU:
return OpARM64LessThanU
case OpARM64LessEqual:
return OpARM64GreaterEqual
case OpARM64LessEqualU:
return OpARM64GreaterEqualU
case OpARM64GreaterEqual:
return OpARM64LessEqual
case OpARM64GreaterEqualU:
return OpARM64LessEqualU
case OpARM64Equal, OpARM64NotEqual:
return op
case OpARM64LessThanF:
return OpARM64GreaterThanF
case OpARM64GreaterThanF:
return OpARM64LessThanF
case OpARM64LessEqualF:
return OpARM64GreaterEqualF
case OpARM64GreaterEqualF:
return OpARM64LessEqualF
case OpARM64NotLessThanF:
return OpARM64NotGreaterThanF
case OpARM64NotGreaterThanF:
return OpARM64NotLessThanF
case OpARM64NotLessEqualF:
return OpARM64NotGreaterEqualF
case OpARM64NotGreaterEqualF:
return OpARM64NotLessEqualF
default:
panic("unreachable")
}
}
// evaluate an ARM64 op against a flags value
// that is potentially constant; return 1 for true,
// -1 for false, and 0 for not constant.
func ccARM64Eval(op Op, flags *Value) int {
fop := flags.Op
if fop == OpARM64InvertFlags {
return -ccARM64Eval(op, flags.Args[0])
}
if fop != OpARM64FlagConstant {
return 0
}
fc := flagConstant(flags.AuxInt)
b2i := func(b bool) int {
if b {
return 1
}
return -1
}
switch op {
case OpARM64Equal:
return b2i(fc.eq())
case OpARM64NotEqual:
return b2i(fc.ne())
case OpARM64LessThan:
return b2i(fc.lt())
case OpARM64LessThanU:
return b2i(fc.ult())
case OpARM64GreaterThan:
return b2i(fc.gt())
case OpARM64GreaterThanU:
return b2i(fc.ugt())
case OpARM64LessEqual:
return b2i(fc.le())
case OpARM64LessEqualU:
return b2i(fc.ule())
case OpARM64GreaterEqual:
return b2i(fc.ge())
case OpARM64GreaterEqualU:
return b2i(fc.uge())
}
return 0
}
// logRule logs the use of the rule s. This will only be enabled if
// rewrite rules were generated with the -log option, see gen/rulegen.go.
func logRule(s string) {
if ruleFile == nil {
// Open a log file to write log to. We open in append
// mode because all.bash runs the compiler lots of times,
// and we want the concatenation of all of those logs.
// This means, of course, that users need to rm the old log
// to get fresh data.
// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
if err != nil {
panic(err)
}
ruleFile = w
}
_, err := fmt.Fprintln(ruleFile, s)
if err != nil {
panic(err)
}
}
var ruleFile io.Writer
func min(x, y int64) int64 {
if x < y {
return x
}
return y
}
func isConstZero(v *Value) bool {
switch v.Op {
case OpConstNil:
return true
case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
return v.AuxInt == 0
}
return false
}
// reciprocalExact64 reports whether 1/c is exactly representable.
func reciprocalExact64(c float64) bool {
b := math.Float64bits(c)
man := b & (1<<52 - 1)
if man != 0 {
return false // not a power of 2, denormal, or NaN
}
exp := b >> 52 & (1<<11 - 1)
// exponent bias is 0x3ff. So taking the reciprocal of a number
// changes the exponent to 0x7fe-exp.
switch exp {
case 0:
return false // ±0
case 0x7ff:
return false // ±inf
case 0x7fe:
return false // exponent is not representable
default:
return true
}
}
// reciprocalExact32 reports whether 1/c is exactly representable.
func reciprocalExact32(c float32) bool {
b := math.Float32bits(c)
man := b & (1<<23 - 1)
if man != 0 {
return false // not a power of 2, denormal, or NaN
}
exp := b >> 23 & (1<<8 - 1)
// exponent bias is 0x7f. So taking the reciprocal of a number
// changes the exponent to 0xfe-exp.
switch exp {
case 0:
return false // ±0
case 0xff:
return false // ±inf
case 0xfe:
return false // exponent is not representable
default:
return true
}
}
// check if an immediate can be directly encoded into an ARM's instruction
func isARMImmRot(v uint32) bool {
for i := 0; i < 16; i++ {
if v&^0xff == 0 {
return true
}
v = v<<2 | v>>30
}
return false
}
// overlap reports whether the ranges given by the given offset and
// size pairs overlap.
func overlap(offset1, size1, offset2, size2 int64) bool {
if offset1 >= offset2 && offset2+size2 > offset1 {
return true
}
if offset2 >= offset1 && offset1+size1 > offset2 {
return true
}
return false
}
func areAdjacentOffsets(off1, off2, size int64) bool {
return off1+size == off2 || off1 == off2+size
}
// check if value zeroes out upper 32-bit of 64-bit register.
// depth limits recursion depth. In AMD64.rules 3 is used as limit,
// because it catches same amount of cases as 4.
func zeroUpper32Bits(x *Value, depth int) bool {
switch x.Op {
case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
OpAMD64SHLL, OpAMD64SHLLconst:
return true
case OpArg:
return x.Type.Width == 4
case OpPhi, OpSelect0, OpSelect1:
// Phis can use each-other as an arguments, instead of tracking visited values,
// just limit recursion depth.
if depth <= 0 {
return false
}
for i := range x.Args {
if !zeroUpper32Bits(x.Args[i], depth-1) {
return false
}
}
return true
}
return false
}
// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits
func zeroUpper48Bits(x *Value, depth int) bool {
switch x.Op {
case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
return true
case OpArg:
return x.Type.Width == 2
case OpPhi, OpSelect0, OpSelect1:
// Phis can use each-other as an arguments, instead of tracking visited values,
// just limit recursion depth.
if depth <= 0 {
return false
}
for i := range x.Args {
if !zeroUpper48Bits(x.Args[i], depth-1) {
return false
}
}
return true
}
return false
}
// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits
func zeroUpper56Bits(x *Value, depth int) bool {
switch x.Op {
case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
return true
case OpArg:
return x.Type.Width == 1
case OpPhi, OpSelect0, OpSelect1:
// Phis can use each-other as an arguments, instead of tracking visited values,
// just limit recursion depth.
if depth <= 0 {
return false
}
for i := range x.Args {
if !zeroUpper56Bits(x.Args[i], depth-1) {
return false
}
}
return true
}
return false
}
// isInlinableMemmove reports whether the given arch performs a Move of the given size
// faster than memmove. It will only return true if replacing the memmove with a Move is
// safe, either because Move is small or because the arguments are disjoint.
// This is used as a check for replacing memmove with Move ops.
func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
// It is always safe to convert memmove into Move when its arguments are disjoint.
// Move ops may or may not be faster for large sizes depending on how the platform
// lowers them, so we only perform this optimization on platforms that we know to
// have fast Move ops.
switch c.arch {
case "amd64":
return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
case "386", "arm64":
return sz <= 8
case "s390x", "ppc64", "ppc64le":
return sz <= 8 || disjoint(dst, sz, src, sz)
case "arm", "mips", "mips64", "mipsle", "mips64le":
return sz <= 4
}
return false
}
// logLargeCopy logs the occurrence of a large copy.
// The best place to do this is in the rewrite rules where the size of the move is easy to find.
// "Large" is arbitrarily chosen to be 128 bytes; this may change.
func logLargeCopy(v *Value, s int64) bool {
if s < 128 {
return true
}
if logopt.Enabled() {
logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
}
return true
}
// hasSmallRotate reports whether the architecture has rotate instructions
// for sizes < 32-bit. This is used to decide whether to promote some rotations.
func hasSmallRotate(c *Config) bool {
switch c.arch {
case "amd64", "386":
return true
default:
return false
}
}
func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
if sh < 0 || sh >= sz {
panic("PPC64 shift arg sh out of range")
}
if mb < 0 || mb >= sz {
panic("PPC64 shift arg mb out of range")
}
if me < 0 || me >= sz {
panic("PPC64 shift arg me out of range")
}
return int32(sh<<16 | mb<<8 | me)
}
func GetPPC64Shiftsh(auxint int64) int64 {
return int64(int8(auxint >> 16))
}
func GetPPC64Shiftmb(auxint int64) int64 {
return int64(int8(auxint >> 8))
}
func GetPPC64Shiftme(auxint int64) int64 {
return int64(int8(auxint))
}
// Test if this value can encoded as a mask for a rlwinm like
// operation. Masks can also extend from the msb and wrap to
// the lsb too. That is, the valid masks are 32 bit strings
// of the form: 0..01..10..0 or 1..10..01..1 or 1...1
func isPPC64WordRotateMask(v64 int64) bool {
// Isolate rightmost 1 (if none 0) and add.
v := uint32(v64)
vp := (v & -v) + v
// Likewise, for the wrapping case.
vn := ^v
vpn := (vn & -vn) + vn
return (v&vp == 0 || vn&vpn == 0) && v != 0
}
// Compress mask and and shift into single value of the form
// me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
// be used to regenerate the input mask.
func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
var mb, me, mbn, men int
// Determine boundaries and then decode them
if mask == 0 || ^mask == 0 || rotate >= nbits {
panic("Invalid PPC64 rotate mask")
} else if nbits == 32 {
mb = bits.LeadingZeros32(uint32(mask))
me = 32 - bits.TrailingZeros32(uint32(mask))
mbn = bits.LeadingZeros32(^uint32(mask))
men = 32 - bits.TrailingZeros32(^uint32(mask))
} else {
mb = bits.LeadingZeros64(uint64(mask))
me = 64 - bits.TrailingZeros64(uint64(mask))
mbn = bits.LeadingZeros64(^uint64(mask))
men = 64 - bits.TrailingZeros64(^uint64(mask))
}
// Check for a wrapping mask (e.g bits at 0 and 63)
if mb == 0 && me == int(nbits) {
// swap the inverted values
mb, me = men, mbn
}
return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
}
// The inverse operation of encodePPC64RotateMask. The values returned as
// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
auxint := uint64(sauxint)
rotate = int64((auxint >> 16) & 0xFF)
mb = int64((auxint >> 8) & 0xFF)
me = int64((auxint >> 0) & 0xFF)
nbits := int64((auxint >> 24) & 0xFF)
mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
if mb > me {
mask = ^mask
}
if nbits == 32 {
mask = uint64(uint32(mask))
}
// Fixup ME to match ISA definition. The second argument to MASK(..,me)
// is inclusive.
me = (me - 1) & (nbits - 1)
return
}
// This verifies that the mask is a set of
// consecutive bits including the least
// significant bit.
func isPPC64ValidShiftMask(v int64) bool {
if (v != 0) && ((v+1)&v) == 0 {
return true
}
return false
}
func getPPC64ShiftMaskLength(v int64) int64 {
return int64(bits.Len64(uint64(v)))
}
// Decompose a shift right into an equivalent rotate/mask,
// and return mask & m.
func mergePPC64RShiftMask(m, s, nbits int64) int64 {
smask := uint64((1<<uint(nbits))-1) >> uint(s)
return m & int64(smask)
}
// Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
func mergePPC64AndSrwi(m, s int64) int64 {
mask := mergePPC64RShiftMask(m, s, 32)
if !isPPC64WordRotateMask(mask) {
return 0
}
return encodePPC64RotateMask((32-s)&31, mask, 32)
}
// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
// Return the encoded RLWINM constant, or 0 if they cannot be merged.
func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
mask_1 := uint64(0xFFFFFFFF >> uint(srw))
// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
// Rewrite mask to apply after the final left shift.
mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
r_1 := 32 - srw
r_2 := GetPPC64Shiftsh(sld)
r_3 := (r_1 + r_2) & 31 // This can wrap.
if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
return 0
}
return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
}
// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
// the encoded RLWINM constant, or 0 if they cannot be merged.
func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
// combine the masks, and adjust for the final left shift.
mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
r_2 := GetPPC64Shiftsh(int64(sld))
r_3 := (r_1 + r_2) & 31 // This can wrap.
// Verify the result is still a valid bitmask of <= 32 bits.
if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
return 0
}
return encodePPC64RotateMask(r_3, int64(mask_3), 32)
}
// Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
// or return 0 if they cannot be combined.
func mergePPC64SldiSrw(sld, srw int64) int64 {
if sld > srw || srw >= 32 {
return 0
}
mask_r := uint32(0xFFFFFFFF) >> uint(srw)
mask_l := uint32(0xFFFFFFFF) >> uint(sld)
mask := (mask_r & mask_l) << uint(sld)
return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
}
// Convenience function to rotate a 32 bit constant value by another constant.
func rotateLeft32(v, rotate int64) int64 {
return int64(bits.RotateLeft32(uint32(v), int(rotate)))
}
// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
func armBFAuxInt(lsb, width int64) arm64BitField {
if lsb < 0 || lsb > 63 {
panic("ARM(64) bit field lsb constant out of range")
}
if width < 1 || width > 64 {
panic("ARM(64) bit field width constant out of range")
}
return arm64BitField(width | lsb<<8)
}
// returns the lsb part of the auxInt field of arm64 bitfield ops.
func (bfc arm64BitField) getARM64BFlsb() int64 {
return int64(uint64(bfc) >> 8)
}
// returns the width part of the auxInt field of arm64 bitfield ops.
func (bfc arm64BitField) getARM64BFwidth() int64 {
return int64(bfc) & 0xff
}
// checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
func isARM64BFMask(lsb, mask, rshift int64) bool {
shiftedMask := int64(uint64(mask) >> uint64(rshift))
return shiftedMask != 0 && isPowerOfTwo64(shiftedMask+1) && nto(shiftedMask)+lsb < 64
}
// returns the bitfield width of mask >> rshift for arm64 bitfield ops
func arm64BFWidth(mask, rshift int64) int64 {
shiftedMask := int64(uint64(mask) >> uint64(rshift))
if shiftedMask == 0 {
panic("ARM64 BF mask is zero")
}
return nto(shiftedMask)
}
// sizeof returns the size of t in bytes.
// It will panic if t is not a *types.Type.
func sizeof(t interface{}) int64 {
return t.(*types.Type).Size()
}
// registerizable reports whether t is a primitive type that fits in
// a register. It assumes float64 values will always fit into registers
// even if that isn't strictly true.
func registerizable(b *Block, typ *types.Type) bool {
if typ.IsPtrShaped() || typ.IsFloat() {
return true
}
if typ.IsInteger() {
return typ.Size() <= b.Func.Config.RegSize
}
return false
}
// needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
func needRaceCleanup(sym *AuxCall, v *Value) bool {
f := v.Block.Func
if !f.Config.Race {
return false
}
if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncenterfp") && !isSameCall(sym, "runtime.racefuncexit") {
return false
}
for _, b := range f.Blocks {
for _, v := range b.Values {
switch v.Op {
case OpStaticCall:
// Check for racefuncenter/racefuncenterfp will encounter racefuncexit and vice versa.
// Allow calls to panic*
s := v.Aux.(*AuxCall).Fn.String()
switch s {
case "runtime.racefuncenter", "runtime.racefuncenterfp", "runtime.racefuncexit",
"runtime.panicdivide", "runtime.panicwrap",
"runtime.panicshift":
continue
}
// If we encountered any call, we need to keep racefunc*,
// for accurate stacktraces.
return false
case OpPanicBounds, OpPanicExtend:
// Note: these are panic generators that are ok (like the static calls above).
case OpClosureCall, OpInterCall:
// We must keep the race functions if there are any other call types.
return false
}
}
}
if isSameCall(sym, "runtime.racefuncenter") {
// If we're removing racefuncenter, remove its argument as well.
if v.Args[0].Op != OpStore {
return false
}
mem := v.Args[0].Args[2]
v.Args[0].reset(OpCopy)
v.Args[0].AddArg(mem)
}
return true
}
// symIsRO reports whether sym is a read-only global.
func symIsRO(sym interface{}) bool {
lsym := sym.(*obj.LSym)
return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
}
// symIsROZero reports whether sym is a read-only global whose data contains all zeros.
func symIsROZero(sym Sym) bool {
lsym := sym.(*obj.LSym)
if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
return false
}
for _, b := range lsym.P {
if b != 0 {
return false
}
}
return true
}
// read8 reads one byte from the read-only global sym at offset off.
func read8(sym interface{}, off int64) uint8 {
lsym := sym.(*obj.LSym)
if off >= int64(len(lsym.P)) || off < 0 {
// Invalid index into the global sym.
// This can happen in dead code, so we don't want to panic.
// Just return any value, it will eventually get ignored.
// See issue 29215.
return 0
}
return lsym.P[off]
}
// read16 reads two bytes from the read-only global sym at offset off.
func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
lsym := sym.(*obj.LSym)
// lsym.P is written lazily.
// Bytes requested after the end of lsym.P are 0.
var src []byte
if 0 <= off && off < int64(len(lsym.P)) {
src = lsym.P[off:]
}
buf := make([]byte, 2)
copy(buf, src)
return byteorder.Uint16(buf)
}
// read32 reads four bytes from the read-only global sym at offset off.
func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
lsym := sym.(*obj.LSym)
var src []byte
if 0 <= off && off < int64(len(lsym.P)) {
src = lsym.P[off:]
}
buf := make([]byte, 4)
copy(buf, src)
return byteorder.Uint32(buf)
}
// read64 reads eight bytes from the read-only global sym at offset off.
func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
lsym := sym.(*obj.LSym)
var src []byte
if 0 <= off && off < int64(len(lsym.P)) {
src = lsym.P[off:]
}
buf := make([]byte, 8)
copy(buf, src)
return byteorder.Uint64(buf)
}
// sequentialAddresses reports true if it can prove that x + n == y
func sequentialAddresses(x, y *Value, n int64) bool {
if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
return true
}
if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
return true
}
if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
return true
}
if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
return true
}
return false
}
// flagConstant represents the result of a compile-time comparison.
// The sense of these flags does not necessarily represent the hardware's notion
// of a flags register - these are just a compile-time construct.
// We happen to match the semantics to those of arm/arm64.
// Note that these semantics differ from x86: the carry flag has the opposite
// sense on a subtraction!
// On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
// On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
// (because it does x + ^y + C).
// See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
type flagConstant uint8
// N reports whether the result of an operation is negative (high bit set).
func (fc flagConstant) N() bool {
return fc&1 != 0
}
// Z reports whether the result of an operation is 0.
func (fc flagConstant) Z() bool {
return fc&2 != 0
}
// C reports whether an unsigned add overflowed (carry), or an
// unsigned subtract did not underflow (borrow).
func (fc flagConstant) C() bool {
return fc&4 != 0
}
// V reports whether a signed operation overflowed or underflowed.
func (fc flagConstant) V() bool {
return fc&8 != 0
}
func (fc flagConstant) eq() bool {
return fc.Z()
}
func (fc flagConstant) ne() bool {
return !fc.Z()
}
func (fc flagConstant) lt() bool {
return fc.N() != fc.V()
}
func (fc flagConstant) le() bool {
return fc.Z() || fc.lt()
}
func (fc flagConstant) gt() bool {
return !fc.Z() && fc.ge()
}
func (fc flagConstant) ge() bool {
return fc.N() == fc.V()
}
func (fc flagConstant) ult() bool {
return !fc.C()
}
func (fc flagConstant) ule() bool {
return fc.Z() || fc.ult()
}
func (fc flagConstant) ugt() bool {
return !fc.Z() && fc.uge()
}
func (fc flagConstant) uge() bool {
return fc.C()
}
func (fc flagConstant) ltNoov() bool {
return fc.lt() && !fc.V()
}
func (fc flagConstant) leNoov() bool {
return fc.le() && !fc.V()
}
func (fc flagConstant) gtNoov() bool {
return fc.gt() && !fc.V()
}
func (fc flagConstant) geNoov() bool {
return fc.ge() && !fc.V()
}
func (fc flagConstant) String() string {
return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
}
type flagConstantBuilder struct {
N bool
Z bool
C bool
V bool
}
func (fcs flagConstantBuilder) encode() flagConstant {
var fc flagConstant
if fcs.N {
fc |= 1
}
if fcs.Z {
fc |= 2
}
if fcs.C {
fc |= 4
}
if fcs.V {
fc |= 8
}
return fc
}
// Note: addFlags(x,y) != subFlags(x,-y) in some situations:
// - the results of the C flag are different
// - the results of the V flag when y==minint are different
// addFlags64 returns the flags that would be set from computing x+y.
func addFlags64(x, y int64) flagConstant {
var fcb flagConstantBuilder
fcb.Z = x+y == 0
fcb.N = x+y < 0
fcb.C = uint64(x+y) < uint64(x)
fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
return fcb.encode()
}
// subFlags64 returns the flags that would be set from computing x-y.
func subFlags64(x, y int64) flagConstant {
var fcb flagConstantBuilder
fcb.Z = x-y == 0
fcb.N = x-y < 0
fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
return fcb.encode()
}
// addFlags32 returns the flags that would be set from computing x+y.
func addFlags32(x, y int32) flagConstant {
var fcb flagConstantBuilder
fcb.Z = x+y == 0
fcb.N = x+y < 0
fcb.C = uint32(x+y) < uint32(x)
fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
return fcb.encode()
}
// subFlags32 returns the flags that would be set from computing x-y.
func subFlags32(x, y int32) flagConstant {
var fcb flagConstantBuilder
fcb.Z = x-y == 0
fcb.N = x-y < 0
fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
return fcb.encode()
}
// logicFlags64 returns flags set to the sign/zeroness of x.
// C and V are set to false.
func logicFlags64(x int64) flagConstant {
var fcb flagConstantBuilder
fcb.Z = x == 0
fcb.N = x < 0
return fcb.encode()
}
// logicFlags32 returns flags set to the sign/zeroness of x.
// C and V are set to false.
func logicFlags32(x int32) flagConstant {
var fcb flagConstantBuilder
fcb.Z = x == 0
fcb.N = x < 0
return fcb.encode()
}
|