< prev index next >

src/hotspot/cpu/aarch64/aarch64-asmtest.py

Print this page
rev 61975 : [vector] Address review comments for AArch64 backend changes
1. Seperate newly added NEON instructions to a new ad file
   aarch64_neon.ad
2. Add assembler tests for NEON instructions. Trailing spaces
   in the python script are also removed.

@@ -1,11 +1,17 @@
+import os
 import random
+import subprocess
+import sys
 
 AARCH64_AS = "<PATH-TO-AS>"
 AARCH64_OBJDUMP = "<PATH-TO-OBJDUMP>"
 AARCH64_OBJCOPY = "<PATH-TO-OBJCOPY>"
 
+# To minimize the changes of assembler test code
+random.seed(0)
+
 class Operand(object):
 
      def generate(self):
         return self
 

@@ -78,10 +84,12 @@
 
 class OperandFactory:
 
     _modes = {'x' : GeneralRegister,
               'w' : GeneralRegister,
+              'b' : FloatRegister,
+              'h' : FloatRegister,
               's' : FloatRegister,
               'd' : FloatRegister,
               'z' : FloatZero}
 
     @classmethod

@@ -335,12 +343,11 @@
 
      def generate(self):
           AddSubImmOp.generate(self)
           self.immed = \
               self.immediates32[random.randint(0, len(self.immediates32)-1)] \
-                if self.isWord \
-              else \
+              if self.isWord else \
                 self.immediates[random.randint(0, len(self.immediates)-1)]
               
           return self
                   
      def astr(self):

@@ -826,11 +833,11 @@
         formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
         return (formatStr
                 % tuple([Instruction.astr(self)] +
                         [(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)]))
 
-class LdStSIMDOp(Instruction):
+class LdStNEONOp(Instruction):
     def __init__(self, args):
         self._name, self.regnum, self.arrangement, self.addresskind = args
 
     def generate(self):
         self.address = Address().generate(self.addresskind, 0)

@@ -845,11 +852,11 @@
                 else:
                     self.address.offset = self.regnum * 16
         return self
 
     def cstr(self):
-        buf = super(LdStSIMDOp, self).cstr() + str(self._firstSIMDreg)
+        buf = super(LdStNEONOp, self).cstr() + str(self._firstSIMDreg)
         current = self._firstSIMDreg
         for cnt in range(1, self.regnum):
             buf = '%s, %s' % (buf, current.nextReg())
             current = current.nextReg()
         return '%s, __ T%s, %s);' % (buf, self.arrangement, str(self.address))

@@ -863,10 +870,61 @@
         return  '%s}, %s' % (buf, self.address.astr("x"))
 
     def aname(self):
          return self._name
 
+class NEONReduceInstruction(Instruction):
+    def __init__(self, args):
+        self._name, self.insname, self.arrangement = args
+
+    def generate(self):
+        current = FloatRegister().generate()
+        self.dstSIMDreg = current
+        self.srcSIMDreg = current.nextReg()
+        return self
+
+    def cstr(self):
+        buf = Instruction.cstr(self) + str(self.dstSIMDreg)
+        buf = '%s, __ T%s, %s);' % (buf, self.arrangement, self.srcSIMDreg)
+        return buf
+
+    def astr(self):
+        buf = '%s\t%s' % (self.insname, self.dstSIMDreg.astr(self.arrangement[-1].lower()))
+        buf = '%s, %s.%s' % (buf, self.srcSIMDreg, self.arrangement)
+        return buf
+
+    def aname(self):
+        return self._name
+
+class CommonNEONInstruction(Instruction):
+    def __init__(self, args):
+        self._name, self.insname, self.arrangement = args
+
+    def generate(self):
+        self._firstSIMDreg = FloatRegister().generate()
+        return self
+
+    def cstr(self):
+        buf = Instruction.cstr(self) + str(self._firstSIMDreg)
+        buf = '%s, __ T%s' % (buf, self.arrangement)
+        current = self._firstSIMDreg
+        for cnt in range(1, self.numRegs):
+            buf = '%s, %s' % (buf, current.nextReg())
+            current = current.nextReg()
+        return '%s);' % (buf)
+
+    def astr(self):
+        buf = '%s\t%s.%s' % (self.insname, self._firstSIMDreg, self.arrangement)
+        current = self._firstSIMDreg
+        for cnt in range(1, self.numRegs):
+            buf = '%s, %s.%s' % (buf, current.nextReg(), self.arrangement)
+            current = current.nextReg()
+        return buf
+
+    def aname(self):
+        return self._name
+
 class LSEOp(Instruction):
     def __init__(self, args):
         self._name, self.asmname, self.size, self.suffix = args
 
     def generate(self):

@@ -907,10 +965,16 @@
         return self._aname
 
     def cname(self):
         return self._cname
 
+class TwoRegNEONOp(CommonNEONInstruction):
+    numRegs = 2
+
+class ThreeRegNEONOp(TwoRegNEONOp):
+    numRegs = 3
+
 class SpecialCases(Instruction):
     def __init__(self, data):
         self._name = data[0]
         self._cstr = data[1]
         self._astr = data[2]

@@ -938,10 +1002,11 @@
              else:
                   print "    %-50s //\t%s" % (op.cstr(), op.astr())
                   outfile.write("\t" + op.astr() + "\n")
 
 outfile = open("aarch64ops.s", "w")
+sys.stdout = open("aarch64ops.asm", "w")
 
 print "// BEGIN  Generated code -- do not edit"
 print "// Generated by aarch64-asmtest.py"
 
 print "    Label back, forth;"

@@ -1074,11 +1139,11 @@
                                 ["stp", "stp", kind, "x"], ["ldp", "ldp", kind, "x"]
                                 ])
 generate(LoadStorePairOp, [["stnp", "stnp", 0, "w"], ["ldnp", "ldnp", 0, "w"],
                            ["stnp", "stnp", 0, "x"], ["ldnp", "ldnp", 0, "x"]])
 
-generate(LdStSIMDOp, [["ld1",  1, "8B",  Address.base_only],
+generate(LdStNEONOp, [["ld1",  1, "8B",  Address.base_only],
                       ["ld1",  2, "16B", Address.post],
                       ["ld1",  3, "1D",  Address.post_reg],
                       ["ld1",  4, "8H",  Address.post],
                       ["ld1r", 1, "8B",  Address.base_only],
                       ["ld1r", 1, "4S",  Address.post],

@@ -1098,10 +1163,97 @@
                       ["ld4r", 4, "8B",  Address.base_only],
                       ["ld4r", 4, "4H",  Address.post],
                       ["ld4r", 4, "2S",  Address.post_reg],
 ])
 
+generate(NEONReduceInstruction,
+         [["addv", "addv", "8B"], ["addv", "addv", "16B"],
+          ["addv", "addv", "4H"], ["addv", "addv", "8H"],
+          ["addv", "addv", "4S"],
+          ["smaxv", "smaxv", "8B"], ["smaxv", "smaxv", "16B"],
+          ["smaxv", "smaxv", "4H"], ["smaxv", "smaxv", "8H"],
+          ["smaxv", "smaxv", "4S"], ["fmaxv", "fmaxv", "4S"],
+          ["sminv", "sminv", "8B"], ["sminv", "sminv", "16B"],
+          ["sminv", "sminv", "4H"], ["sminv", "sminv", "8H"],
+          ["sminv", "sminv", "4S"], ["fminv", "fminv", "4S"],
+          ])
+
+generate(TwoRegNEONOp,
+         [["absr", "abs", "8B"], ["absr", "abs", "16B"],
+          ["absr", "abs", "4H"], ["absr", "abs", "8H"],
+          ["absr", "abs", "2S"], ["absr", "abs", "4S"],
+          ["absr", "abs", "2D"],
+          ["fabs", "fabs", "2S"], ["fabs", "fabs", "4S"],
+          ["fabs", "fabs", "2D"],
+          ["fneg", "fneg", "2S"], ["fneg", "fneg", "4S"],
+          ["fneg", "fneg", "2D"],
+          ["fsqrt", "fsqrt", "2S"], ["fsqrt", "fsqrt", "4S"],
+          ["fsqrt", "fsqrt", "2D"],
+          ["notr", "not", "8B"], ["notr", "not", "16B"],
+          ])
+
+generate(ThreeRegNEONOp,
+         [["andr", "and", "8B"], ["andr", "and", "16B"],
+          ["orr", "orr", "8B"], ["orr", "orr", "16B"],
+          ["eor", "eor", "8B"], ["eor", "eor", "16B"],
+          ["addv", "add", "8B"], ["addv", "add", "16B"],
+          ["addv", "add", "4H"], ["addv", "add", "8H"],
+          ["addv", "add", "2S"], ["addv", "add", "4S"],
+          ["addv", "add", "2D"],
+          ["fadd", "fadd", "2S"], ["fadd", "fadd", "4S"],
+          ["fadd", "fadd", "2D"],
+          ["subv", "sub", "8B"], ["subv", "sub", "16B"],
+          ["subv", "sub", "4H"], ["subv", "sub", "8H"],
+          ["subv", "sub", "2S"], ["subv", "sub", "4S"],
+          ["subv", "sub", "2D"],
+          ["fsub", "fsub", "2S"], ["fsub", "fsub", "4S"],
+          ["fsub", "fsub", "2D"],
+          ["mulv", "mul", "8B"], ["mulv", "mul", "16B"],
+          ["mulv", "mul", "4H"], ["mulv", "mul", "8H"],
+          ["mulv", "mul", "2S"], ["mulv", "mul", "4S"],
+          ["fmul", "fmul", "2S"], ["fmul", "fmul", "4S"],
+          ["fmul", "fmul", "2D"],
+          ["mlav", "mla", "4H"], ["mlav", "mla", "8H"],
+          ["mlav", "mla", "2S"], ["mlav", "mla", "4S"],
+          ["fmla", "fmla", "2S"], ["fmla", "fmla", "4S"],
+          ["fmla", "fmla", "2D"],
+          ["mlsv", "mls", "4H"], ["mlsv", "mls", "8H"],
+          ["mlsv", "mls", "2S"], ["mlsv", "mls", "4S"],
+          ["fmls", "fmls", "2S"], ["fmls", "fmls", "4S"],
+          ["fmls", "fmls", "2D"],
+          ["fdiv", "fdiv", "2S"], ["fdiv", "fdiv", "4S"],
+          ["fdiv", "fdiv", "2D"],
+          ["maxv", "smax", "8B"], ["maxv", "smax", "16B"],
+          ["maxv", "smax", "4H"], ["maxv", "smax", "8H"],
+          ["maxv", "smax", "2S"], ["maxv", "smax", "4S"],
+          ["fmax", "fmax", "2S"], ["fmax", "fmax", "4S"],
+          ["fmax", "fmax", "2D"],
+          ["minv", "smin", "8B"], ["minv", "smin", "16B"],
+          ["minv", "smin", "4H"], ["minv", "smin", "8H"],
+          ["minv", "smin", "2S"], ["minv", "smin", "4S"],
+          ["fmin", "fmin", "2S"], ["fmin", "fmin", "4S"],
+          ["fmin", "fmin", "2D"],
+          ["cmeq", "cmeq", "8B"], ["cmeq", "cmeq", "16B"],
+          ["cmeq", "cmeq", "4H"], ["cmeq", "cmeq", "8H"],
+          ["cmeq", "cmeq", "2S"], ["cmeq", "cmeq", "4S"],
+          ["cmeq", "cmeq", "2D"],
+          ["fcmeq", "fcmeq", "2S"], ["fcmeq", "fcmeq", "4S"],
+          ["fcmeq", "fcmeq", "2D"],
+          ["cmgt", "cmgt", "8B"], ["cmgt", "cmgt", "16B"],
+          ["cmgt", "cmgt", "4H"], ["cmgt", "cmgt", "8H"],
+          ["cmgt", "cmgt", "2S"], ["cmgt", "cmgt", "4S"],
+          ["cmgt", "cmgt", "2D"],
+          ["fcmgt", "fcmgt", "2S"], ["fcmgt", "fcmgt", "4S"],
+          ["fcmgt", "fcmgt", "2D"],
+          ["cmge", "cmge", "8B"], ["cmge", "cmge", "16B"],
+          ["cmge", "cmge", "4H"], ["cmge", "cmge", "8H"],
+          ["cmge", "cmge", "2S"], ["cmge", "cmge", "4S"],
+          ["cmge", "cmge", "2D"],
+          ["fcmge", "fcmge", "2S"], ["fcmge", "fcmge", "4S"],
+          ["fcmge", "fcmge", "2D"],
+          ])
+
 generate(SpecialCases, [["ccmn",   "__ ccmn(zr, zr, 3u, Assembler::LE);",                "ccmn\txzr, xzr, #3, LE"],
                         ["ccmnw",  "__ ccmnw(zr, zr, 5u, Assembler::EQ);",               "ccmn\twzr, wzr, #5, EQ"],
                         ["ccmp",   "__ ccmp(zr, 1, 4u, Assembler::NE);",                 "ccmp\txzr, 1, #4, NE"],
                         ["ccmpw",  "__ ccmpw(zr, 2, 2, Assembler::GT);",                 "ccmp\twzr, 2, #2, GT"],
                         ["extr",   "__ extr(zr, zr, zr, 0);",                            "extr\txzr, xzr, xzr, 0"],

@@ -1142,20 +1294,17 @@
 print "\n    __ bind(forth);"
 outfile.write("forth:\n")
 
 outfile.close()
 
-import subprocess
-import sys
-
 # compile for 8.1 because of lse atomics
 subprocess.check_call([AARCH64_AS, "-march=armv8.1-a", "aarch64ops.s", "-o", "aarch64ops.o"])
+output = subprocess.check_output([AARCH64_OBJDUMP, "-d", "aarch64ops.o"])
 
 print
-print "/*",
-sys.stdout.flush()
-subprocess.check_call([AARCH64_OBJDUMP, "-d", "aarch64ops.o"])
+print "/*"
+print output
 print "*/"
 
 subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"])
 
 infile = open("aarch64ops.bin", "r")

@@ -1172,6 +1321,18 @@
      if i%16 == 0:
           print
 print "\n  };"
 print "// END  Generated code -- do not edit"
 
+infile.close()
+sys.stdout.close()
+
+# Remove trailing spaces and replace tab with 4 spaces
+sys.stdout = sys.__stdout__
+with open("aarch64ops.asm", "r") as infile:
+    for line in infile:
+        line = line.rstrip()
+        line = line.replace("\t", "    ")
+        print line
 
+for f in ["aarch64ops.s", "aarch64ops.o", "aarch64ops.bin", "aarch64ops.asm"]:
+    os.remove(f)
< prev index next >