3274
3275 if (multi_block) {
3276 __ add(ofs, ofs, 128);
3277 __ cmp(ofs, limit);
3278 __ br(Assembler::LE, sha512_loop);
3279 __ mov(c_rarg0, ofs); // return ofs
3280 }
3281
3282 __ st1(v8, v9, v10, v11, __ T2D, state);
3283
3284 __ ldpd(v14, v15, Address(sp, 48));
3285 __ ldpd(v12, v13, Address(sp, 32));
3286 __ ldpd(v10, v11, Address(sp, 16));
3287 __ ldpd(v8, v9, __ post(sp, 64));
3288
3289 __ ret(lr);
3290
3291 return start;
3292 }
3293
3294 // Safefetch stubs.
3295 void generate_safefetch(const char* name, int size, address* entry,
3296 address* fault_pc, address* continuation_pc) {
3297 // safefetch signatures:
3298 // int SafeFetch32(int* adr, int errValue);
3299 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
3300 //
3301 // arguments:
3302 // c_rarg0 = adr
3303 // c_rarg1 = errValue
3304 //
3305 // result:
3306 // PPC_RET = *adr or errValue
3307
3308 StubCodeMark mark(this, "StubRoutines", name);
3309
3310 // Entry point, pc or function descriptor.
3311 *entry = __ pc();
3312
3313 // Load *adr into c_rarg1, may fault.
6004 StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync();
6005
6006 if (UseAESIntrinsics) {
6007 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
6008 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
6009 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
6010 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
6011 }
6012
6013 if (UseSHA1Intrinsics) {
6014 StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
6015 StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
6016 }
6017 if (UseSHA256Intrinsics) {
6018 StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
6019 StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
6020 }
6021 if (UseSHA512Intrinsics) {
6022 StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
6023 StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
6024 }
6025
6026 // generate Adler32 intrinsics code
6027 if (UseAdler32Intrinsics) {
6028 StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
6029 }
6030
6031 StubRoutines::aarch64::set_completed();
6032 }
6033
6034 public:
6035 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
6036 if (all) {
6037 generate_all();
6038 } else {
6039 generate_initial();
6040 }
6041 }
6042 }; // end class declaration
6043
|
3274
3275 if (multi_block) {
3276 __ add(ofs, ofs, 128);
3277 __ cmp(ofs, limit);
3278 __ br(Assembler::LE, sha512_loop);
3279 __ mov(c_rarg0, ofs); // return ofs
3280 }
3281
3282 __ st1(v8, v9, v10, v11, __ T2D, state);
3283
3284 __ ldpd(v14, v15, Address(sp, 48));
3285 __ ldpd(v12, v13, Address(sp, 32));
3286 __ ldpd(v10, v11, Address(sp, 16));
3287 __ ldpd(v8, v9, __ post(sp, 64));
3288
3289 __ ret(lr);
3290
3291 return start;
3292 }
3293
3294 // Arguments:
3295 //
3296 // Inputs:
3297 // c_rarg0 - byte[] source+offset
3298 // c_rarg1 - byte[] SHA.state
3299 // c_rarg2 - int digest_length
3300 // c_rarg3 - int offset
3301 // c_rarg4 - int limit
3302 //
3303 address generate_sha3_implCompress(bool multi_block, const char *name) {
3304 static const uint64_t round_consts[24] = {
3305 0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL,
3306 0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L,
3307 0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL,
3308 0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL,
3309 0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L,
3310 0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L,
3311 0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L,
3312 0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L
3313 };
3314
3315 __ align(CodeEntryAlignment);
3316 StubCodeMark mark(this, "StubRoutines", name);
3317 address start = __ pc();
3318
3319 Register buf = c_rarg0;
3320 Register state = c_rarg1;
3321 Register digest_length = c_rarg2;
3322 Register ofs = c_rarg3;
3323 Register limit = c_rarg4;
3324
3325 Label sha3_loop, rounds24_loop;
3326 Label sha3_512, sha3_384_or_224, sha3_256;
3327
3328 __ stpd(v8, v9, __ pre(sp, -64));
3329 __ stpd(v10, v11, Address(sp, 16));
3330 __ stpd(v12, v13, Address(sp, 32));
3331 __ stpd(v14, v15, Address(sp, 48));
3332
3333 // load state
3334 __ add(rscratch1, state, 32);
3335 __ ld1(v0, v1, v2, v3, __ T1D, state);
3336 __ ld1(v4, v5, v6, v7, __ T1D, __ post(rscratch1, 32));
3337 __ ld1(v8, v9, v10, v11, __ T1D, __ post(rscratch1, 32));
3338 __ ld1(v12, v13, v14, v15, __ T1D, __ post(rscratch1, 32));
3339 __ ld1(v16, v17, v18, v19, __ T1D, __ post(rscratch1, 32));
3340 __ ld1(v20, v21, v22, v23, __ T1D, __ post(rscratch1, 32));
3341 __ ld1(v24, __ T1D, rscratch1);
3342
3343 __ BIND(sha3_loop);
3344
3345 // 24 keccak rounds
3346 __ movw(rscratch2, 24);
3347
3348 // load round_constants base
3349 __ lea(rscratch1, ExternalAddress((address) round_consts));
3350
3351 // load input
3352 __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
3353 __ ld1(v29, v30, v31, __ T8B, __ post(buf, 24));
3354 __ eor(v0, __ T8B, v0, v25);
3355 __ eor(v1, __ T8B, v1, v26);
3356 __ eor(v2, __ T8B, v2, v27);
3357 __ eor(v3, __ T8B, v3, v28);
3358 __ eor(v4, __ T8B, v4, v29);
3359 __ eor(v5, __ T8B, v5, v30);
3360 __ eor(v6, __ T8B, v6, v31);
3361
3362 // digest_length == 64, SHA3-512
3363 __ tbnz(digest_length, 6, sha3_512);
3364
3365 __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
3366 __ ld1(v29, v30, __ T8B, __ post(buf, 16));
3367 __ eor(v7, __ T8B, v7, v25);
3368 __ eor(v8, __ T8B, v8, v26);
3369 __ eor(v9, __ T8B, v9, v27);
3370 __ eor(v10, __ T8B, v10, v28);
3371 __ eor(v11, __ T8B, v11, v29);
3372 __ eor(v12, __ T8B, v12, v30);
3373
3374 // digest_length == 28, SHA3-224; digest_length == 48, SHA3-384
3375 __ tbnz(digest_length, 4, sha3_384_or_224);
3376
3377 // SHA3-256
3378 __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
3379 __ eor(v13, __ T8B, v13, v25);
3380 __ eor(v14, __ T8B, v14, v26);
3381 __ eor(v15, __ T8B, v15, v27);
3382 __ eor(v16, __ T8B, v16, v28);
3383 __ b(rounds24_loop);
3384
3385 __ BIND(sha3_384_or_224);
3386 __ tbz(digest_length, 2, rounds24_loop); // bit 2 cleared? SHA-384
3387
3388 // SHA3-224
3389 __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
3390 __ ld1(v29, __ T8B, __ post(buf, 8));
3391 __ eor(v13, __ T8B, v13, v25);
3392 __ eor(v14, __ T8B, v14, v26);
3393 __ eor(v15, __ T8B, v15, v27);
3394 __ eor(v16, __ T8B, v16, v28);
3395 __ eor(v17, __ T8B, v17, v29);
3396 __ b(rounds24_loop);
3397
3398 __ BIND(sha3_512);
3399 __ ld1(v25, v26, __ T8B, __ post(buf, 16));
3400 __ eor(v7, __ T8B, v7, v25);
3401 __ eor(v8, __ T8B, v8, v26);
3402
3403 __ BIND(rounds24_loop);
3404 __ subw(rscratch2, rscratch2, 1);
3405
3406 __ eor3(v29, __ T16B, v4, v9, v14);
3407 __ eor3(v26, __ T16B, v1, v6, v11);
3408 __ eor3(v28, __ T16B, v3, v8, v13);
3409 __ eor3(v25, __ T16B, v0, v5, v10);
3410 __ eor3(v27, __ T16B, v2, v7, v12);
3411 __ eor3(v29, __ T16B, v29, v19, v24);
3412 __ eor3(v26, __ T16B, v26, v16, v21);
3413 __ eor3(v28, __ T16B, v28, v18, v23);
3414 __ eor3(v25, __ T16B, v25, v15, v20);
3415 __ eor3(v27, __ T16B, v27, v17, v22);
3416
3417 __ rax1(v30, __ T2D, v29, v26);
3418 __ rax1(v26, __ T2D, v26, v28);
3419 __ rax1(v28, __ T2D, v28, v25);
3420 __ rax1(v25, __ T2D, v25, v27);
3421 __ rax1(v27, __ T2D, v27, v29);
3422
3423 __ eor(v0, __ T16B, v0, v30);
3424 __ xar(v29, __ T2D, v1, v25, (64 - 1));
3425 __ xar(v1, __ T2D, v6, v25, (64 - 44));
3426 __ xar(v6, __ T2D, v9, v28, (64 - 20));
3427 __ xar(v9, __ T2D, v22, v26, (64 - 61));
3428 __ xar(v22, __ T2D, v14, v28, (64 - 39));
3429 __ xar(v14, __ T2D, v20, v30, (64 - 18));
3430 __ xar(v31, __ T2D, v2, v26, (64 - 62));
3431 __ xar(v2, __ T2D, v12, v26, (64 - 43));
3432 __ xar(v12, __ T2D, v13, v27, (64 - 25));
3433 __ xar(v13, __ T2D, v19, v28, (64 - 8));
3434 __ xar(v19, __ T2D, v23, v27, (64 - 56));
3435 __ xar(v23, __ T2D, v15, v30, (64 - 41));
3436 __ xar(v15, __ T2D, v4, v28, (64 - 27));
3437 __ xar(v28, __ T2D, v24, v28, (64 - 14));
3438 __ xar(v24, __ T2D, v21, v25, (64 - 2));
3439 __ xar(v8, __ T2D, v8, v27, (64 - 55));
3440 __ xar(v4, __ T2D, v16, v25, (64 - 45));
3441 __ xar(v16, __ T2D, v5, v30, (64 - 36));
3442 __ xar(v5, __ T2D, v3, v27, (64 - 28));
3443 __ xar(v27, __ T2D, v18, v27, (64 - 21));
3444 __ xar(v3, __ T2D, v17, v26, (64 - 15));
3445 __ xar(v25, __ T2D, v11, v25, (64 - 10));
3446 __ xar(v26, __ T2D, v7, v26, (64 - 6));
3447 __ xar(v30, __ T2D, v10, v30, (64 - 3));
3448
3449 __ bcax(v20, __ T16B, v31, v22, v8);
3450 __ bcax(v21, __ T16B, v8, v23, v22);
3451 __ bcax(v22, __ T16B, v22, v24, v23);
3452 __ bcax(v23, __ T16B, v23, v31, v24);
3453 __ bcax(v24, __ T16B, v24, v8, v31);
3454
3455 __ ld1r(v31, __ T2D, __ post(rscratch1, 8));
3456
3457 __ bcax(v17, __ T16B, v25, v19, v3);
3458 __ bcax(v18, __ T16B, v3, v15, v19);
3459 __ bcax(v19, __ T16B, v19, v16, v15);
3460 __ bcax(v15, __ T16B, v15, v25, v16);
3461 __ bcax(v16, __ T16B, v16, v3, v25);
3462
3463 __ bcax(v10, __ T16B, v29, v12, v26);
3464 __ bcax(v11, __ T16B, v26, v13, v12);
3465 __ bcax(v12, __ T16B, v12, v14, v13);
3466 __ bcax(v13, __ T16B, v13, v29, v14);
3467 __ bcax(v14, __ T16B, v14, v26, v29);
3468
3469 __ bcax(v7, __ T16B, v30, v9, v4);
3470 __ bcax(v8, __ T16B, v4, v5, v9);
3471 __ bcax(v9, __ T16B, v9, v6, v5);
3472 __ bcax(v5, __ T16B, v5, v30, v6);
3473 __ bcax(v6, __ T16B, v6, v4, v30);
3474
3475 __ bcax(v3, __ T16B, v27, v0, v28);
3476 __ bcax(v4, __ T16B, v28, v1, v0);
3477 __ bcax(v0, __ T16B, v0, v2, v1);
3478 __ bcax(v1, __ T16B, v1, v27, v2);
3479 __ bcax(v2, __ T16B, v2, v28, v27);
3480
3481 __ eor(v0, __ T16B, v0, v31);
3482
3483 __ cbnzw(rscratch2, rounds24_loop);
3484
3485 if (multi_block) {
3486 // block_size = 200 - 2 * digest_length, ofs += block_size
3487 __ add(ofs, ofs, 200);
3488 __ sub(ofs, ofs, digest_length, Assembler::LSL, 1);
3489
3490 __ cmp(ofs, limit);
3491 __ br(Assembler::LE, sha3_loop);
3492 __ mov(c_rarg0, ofs); // return ofs
3493 }
3494
3495 __ st1(v0, v1, v2, v3, __ T1D, __ post(state, 32));
3496 __ st1(v4, v5, v6, v7, __ T1D, __ post(state, 32));
3497 __ st1(v8, v9, v10, v11, __ T1D, __ post(state, 32));
3498 __ st1(v12, v13, v14, v15, __ T1D, __ post(state, 32));
3499 __ st1(v16, v17, v18, v19, __ T1D, __ post(state, 32));
3500 __ st1(v20, v21, v22, v23, __ T1D, __ post(state, 32));
3501 __ st1(v24, __ T1D, state);
3502
3503 __ ldpd(v14, v15, Address(sp, 48));
3504 __ ldpd(v12, v13, Address(sp, 32));
3505 __ ldpd(v10, v11, Address(sp, 16));
3506 __ ldpd(v8, v9, __ post(sp, 64));
3507
3508 __ ret(lr);
3509
3510 return start;
3511 }
3512
3513 // Safefetch stubs.
3514 void generate_safefetch(const char* name, int size, address* entry,
3515 address* fault_pc, address* continuation_pc) {
3516 // safefetch signatures:
3517 // int SafeFetch32(int* adr, int errValue);
3518 // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
3519 //
3520 // arguments:
3521 // c_rarg0 = adr
3522 // c_rarg1 = errValue
3523 //
3524 // result:
3525 // PPC_RET = *adr or errValue
3526
3527 StubCodeMark mark(this, "StubRoutines", name);
3528
3529 // Entry point, pc or function descriptor.
3530 *entry = __ pc();
3531
3532 // Load *adr into c_rarg1, may fault.
6223 StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync();
6224
6225 if (UseAESIntrinsics) {
6226 StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
6227 StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
6228 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
6229 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
6230 }
6231
6232 if (UseSHA1Intrinsics) {
6233 StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
6234 StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
6235 }
6236 if (UseSHA256Intrinsics) {
6237 StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
6238 StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
6239 }
6240 if (UseSHA512Intrinsics) {
6241 StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
6242 StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
6243 }
6244 if (UseSHA3Intrinsics) {
6245 StubRoutines::_sha3_implCompress = generate_sha3_implCompress(false, "sha3_implCompress");
6246 StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress(true, "sha3_implCompressMB");
6247 }
6248
6249 // generate Adler32 intrinsics code
6250 if (UseAdler32Intrinsics) {
6251 StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
6252 }
6253
6254 StubRoutines::aarch64::set_completed();
6255 }
6256
6257 public:
6258 StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
6259 if (all) {
6260 generate_all();
6261 } else {
6262 generate_initial();
6263 }
6264 }
6265 }; // end class declaration
6266
|